diff --git a/README.md b/README.md index 41b91f0..2fbb2c6 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,72 @@ CUDA Rasterizer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Yuru Wang +* Tested on: Windows 10, i7-7700HQ @ 2.5GHz 128GB, GTX 1050 Ti 8GB (personal computer) +* Modified CMakeList.txt: changed sm_20 to sm_61 inside cuda_add_library -### (TODO: Your README) +## Project Description ## +This project implements a simplified rasterized graphics pipeline, similar to the OpenGL pipeline which includes vertex shading, primitive assembly, rasterization, fragment shading, and a framebuffer -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +A list of features implemented in this project is as follows: +* Vertex shading. (vertexTransformAndAssembly in rasterize.cu) +* Primitive assembly with support for triangles read from buffers of index and vertex data. (primitiveAssembly in rasterize.cu) +* Rasterization. (**support rasterization for points, lines and triangles**) +* A depth buffer for storing and depth testing fragments. +* Fragment-to-depth-buffer writing (Utilized CUDA atomicMin() for race avoidance). +* Fragment shader with Lambert shading and point light (render in rasterize.cu) +* **UV texture mapping with bilinear texture filtering and perspective correct texture coordinates**. + +## Results ## +**Screen recording of running program with lambert shading and point light** + +| duck.gltf | CesiumMilkTruck.gltf | +|------|------| +| ![](renders/demo.gif) |![](renders/demo_2.gif) | + +**Render depth and normals** + +| depth | normal | +|------|------| +| ![](renders/depth.png) | ![](renders/normals.png) | + +**Texture mapping and lambert shading** + +| plain color | lambert shading | +|------|------| +| ![](renders/duck_plain.png) | ![](renders/duck_lambert.png) | + +| plain color | lambert shading | +|------|------| +| ![](renders/truck_plain.png) | ![](renders/truck_lambert.png) | + +**Render texture w/o bilinear texture filtering** + +| no bilinear filtering | with bilinear filtering | +|------|------| +| ![](renders/no_bilinear.png) | ![](renders/bilinear.png) | + +**Back face culling** + +| render front face | render back face | +|------|------| +| ![](renders/frontface.png) | ![](renders/backface.png) | + +**Render points and lines** + +| render points | render lines | +|------|------| +| ![](renders/points.png) | ![](renders/lines.png) | + +## Performance Analysis and Questions ## +![](renders/time.png) +Above figure shows the time spent on different rendering pipeline stages for three models. The time is measured on the present of lambert shading, back face culling, and bilinear filtering. As expected, rasterization takes most amount of time in graphics pipeline because the rasterization is parallelized by primitives and each thread needs to iterate through all fragments inside the bounding box, which has a lot more work than other stages. For duck.gltf, the time spent on fragment shading is almost the same as rasterization, this is because in duck model, after rasterization, every fragment have different normals due to the curve surfaces, which results in different calculation time for lambert shading on each threads. + +![](renders/backface-culling.png) +This figure is for the duck.gltf with lambert shading. We can observe an optimization on the performance of rasterization stage which is expected since back face culling eliminates primitives that are not visible by the camera and thus some threads or warps could exterminate quickly. + +![](renders/bilinear-filtering.png) +Above figure is obtained by rendering duck.gltf with back face culling and just texture mapping(no lambert shading). We can notice a little bit performance decay on the fragment shading stage if we add bilinear texture filtering. This is because with bilinear filtering, there are some more operations need to done for every fragment (literally 3 more linear color interpolations) which slow down the process a little bit. ### Credits diff --git "a/renders/QQ\346\210\252\345\233\27620181013141433.png" "b/renders/QQ\346\210\252\345\233\27620181013141433.png" new file mode 100644 index 0000000..b900b0f Binary files /dev/null and "b/renders/QQ\346\210\252\345\233\27620181013141433.png" differ diff --git a/renders/backface-culling.png b/renders/backface-culling.png new file mode 100644 index 0000000..bd76afe Binary files /dev/null and b/renders/backface-culling.png differ diff --git a/renders/backface.png b/renders/backface.png new file mode 100644 index 0000000..47d69af Binary files /dev/null and b/renders/backface.png differ diff --git a/renders/bilinear-filtering.png b/renders/bilinear-filtering.png new file mode 100644 index 0000000..3fd1bfd Binary files /dev/null and b/renders/bilinear-filtering.png differ diff --git a/renders/bilinear.png b/renders/bilinear.png new file mode 100644 index 0000000..3dfaf05 Binary files /dev/null and b/renders/bilinear.png differ diff --git a/renders/demo.gif b/renders/demo.gif new file mode 100644 index 0000000..0ef6530 Binary files /dev/null and b/renders/demo.gif differ diff --git a/renders/demo_2.gif b/renders/demo_2.gif new file mode 100644 index 0000000..598dde6 Binary files /dev/null and b/renders/demo_2.gif differ diff --git a/renders/depth.png b/renders/depth.png new file mode 100644 index 0000000..523ed9e Binary files /dev/null and b/renders/depth.png differ diff --git a/renders/duck-diffuse-texture.png b/renders/duck-diffuse-texture.png deleted file mode 100644 index 26e500e..0000000 Binary files a/renders/duck-diffuse-texture.png and /dev/null differ diff --git a/renders/duck_lambert.png b/renders/duck_lambert.png new file mode 100644 index 0000000..60e9384 Binary files /dev/null and b/renders/duck_lambert.png differ diff --git a/renders/duck_plain.png b/renders/duck_plain.png new file mode 100644 index 0000000..ae349d8 Binary files /dev/null and b/renders/duck_plain.png differ diff --git a/renders/frontface.png b/renders/frontface.png new file mode 100644 index 0000000..5b87412 Binary files /dev/null and b/renders/frontface.png differ diff --git a/renders/lines.png b/renders/lines.png new file mode 100644 index 0000000..6551b4a Binary files /dev/null and b/renders/lines.png differ diff --git a/renders/no_bilinear.png b/renders/no_bilinear.png new file mode 100644 index 0000000..9cf8b2c Binary files /dev/null and b/renders/no_bilinear.png differ diff --git a/renders/normals.png b/renders/normals.png new file mode 100644 index 0000000..b141b31 Binary files /dev/null and b/renders/normals.png differ diff --git a/renders/points.png b/renders/points.png new file mode 100644 index 0000000..52c7966 Binary files /dev/null and b/renders/points.png differ diff --git a/renders/time.png b/renders/time.png new file mode 100644 index 0000000..0627311 Binary files /dev/null and b/renders/time.png differ diff --git a/renders/truck_lambert.png b/renders/truck_lambert.png new file mode 100644 index 0000000..afbc689 Binary files /dev/null and b/renders/truck_lambert.png differ diff --git a/renders/truck_plain.png b/renders/truck_plain.png new file mode 100644 index 0000000..181c8a0 Binary files /dev/null and b/renders/truck_plain.png differ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a57f69f..00edee0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,5 +6,5 @@ set(SOURCE_FILES cuda_add_library(src ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_61 ) diff --git a/src/main.cpp b/src/main.cpp index 7986959..a35ba8e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -121,6 +121,7 @@ void runCuda() { cudaGLMapBufferObject((void **)&dptr, pbo); rasterize(dptr, MVP, MV, MV_normal); + cout << "elapsed time: " << timer().getGpuElapsedTimeForPreviousOperation() << endl; cudaGLUnmapBufferObject(pbo); frame++; diff --git a/src/rasterize.cu b/src/rasterize.cu index 1262a09..94c0c7f 100644 --- a/src/rasterize.cu +++ b/src/rasterize.cu @@ -18,6 +18,25 @@ #include #include +#define RENDER_NORMALS 0 +#define RENDER_DEPTH 0 +#define RENDER_TEXTURE 1 + +#define LAMBERT 0 +#define BILINEAR_FILTER 1 + +#define RENDER_POINT 0 +#define RENDER_LINE 0 +#define RENDER_TRIANGLE 1 + +#define BACKFACE_CULLING 1 + +PerformanceTimer& timer() +{ + static PerformanceTimer timer; + return timer; +} + namespace { typedef unsigned short VertexIndex; @@ -44,9 +63,9 @@ namespace { glm::vec3 eyePos; // eye space position used for shading glm::vec3 eyeNor; // eye space normal used for shading, cuz normal will go wrong after perspective transformation // glm::vec3 col; - glm::vec2 texcoord0; + VertexAttributeTexcoord texcoord0; TextureData* dev_diffuseTex = NULL; - // int texWidth, texHeight; + int texWidth, texHeight; // ... }; @@ -62,10 +81,12 @@ namespace { // The attributes listed below might be useful, // but always feel free to modify on your own - // glm::vec3 eyePos; // eye space position used for shading - // glm::vec3 eyeNor; - // VertexAttributeTexcoord texcoord0; - // TextureData* dev_diffuseTex; + glm::vec3 eyePos; // eye space position used for shading + glm::vec3 eyeNor; + VertexAttributeTexcoord texcoord0; + TextureData* dev_diffuseTex; + int texWidth, texHeight; + float z; // ... }; @@ -143,9 +164,88 @@ void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) { int index = x + (y * w); if (x < w && y < h) { - framebuffer[index] = fragmentBuffer[index].color; + glm::vec3 color = glm::vec3(0.0f, 0.0f, 0.0f); + Fragment fragment = fragmentBuffer[index]; // TODO: add your fragment shader code here +#if RENDER_TEXTURE + glm::vec3 diffuseCol; + if (fragment.dev_diffuseTex != NULL) { + TextureData *texture = fragment.dev_diffuseTex; + + #if BILINEAR_FILTER + glm::vec2 UV = glm::vec2(fragment.texcoord0.x * fragment.texWidth, fragment.texcoord0.y * fragment.texHeight); + glm::ivec2 UV_Int = glm::ivec2((int)UV.x, (int)UV.y); + + float delta_x = UV.x - UV_Int.x; + float delta_y = UV.y - UV_Int.y; + + int colIdx_01 = UV_Int.x + UV_Int.y * fragment.texWidth; + int colIdx_02 = UV_Int.x + 1 + UV_Int.y * fragment.texWidth; + int colIdx_03 = UV_Int.x + (UV_Int.y + 1) * fragment.texWidth; + int colIdx_04 = UV_Int.x + 1 + (UV_Int.y + 1) * fragment.texWidth; + + glm::vec3 diffuse_01 = glm::vec3(texture[colIdx_01 * 3], texture[colIdx_01 * 3 + 1], texture[colIdx_01 * 3 + 2]); + glm::vec3 diffuse_02 = glm::vec3(texture[colIdx_02 * 3], texture[colIdx_02 * 3 + 1], texture[colIdx_02 * 3 + 2]); + glm::vec3 diffuse_03 = glm::vec3(texture[colIdx_03 * 3], texture[colIdx_03 * 3 + 1], texture[colIdx_03 * 3 + 2]); + glm::vec3 diffuse_04 = glm::vec3(texture[colIdx_04 * 3], texture[colIdx_04 * 3 + 1], texture[colIdx_04 * 3 + 2]); + + glm::vec3 diffuse_interp_1 = glm::mix(diffuse_01, diffuse_02, delta_x); + glm::vec3 diffuse_interp_2 = glm::mix(diffuse_03, diffuse_04, delta_x); + diffuseCol = glm::mix(diffuse_interp_1, diffuse_interp_2, delta_y); + diffuseCol /= 255.0f; + + + #else + glm::ivec2 UV = glm::ivec2((int)(fragment.texcoord0.x * fragment.texWidth), (int)(fragment.texcoord0.y * fragment.texHeight)); + int colIdx = UV.x + UV.y * fragment.texWidth; + diffuseCol.r = texture[colIdx * 3]; + diffuseCol.g = texture[colIdx * 3 + 1]; + diffuseCol.b = texture[colIdx * 3 + 2]; + diffuseCol /= 255.0f; + #endif + + } + else { + diffuseCol = fragment.color; + } + + #if LAMBERT + glm::vec3 lightPos = glm::vec3(10.0f, 10.0f, 10.0f); + glm::vec3 lightVec = glm::normalize(lightPos - fragment.eyePos); // point light + //glm::vec3 lightVec = glm::vec3(1.0f, 1.0f, 1.0f); // directional light + float lambert_term = glm::dot(lightVec, fragment.eyeNor); + color = diffuseCol * lambert_term; + #else + color = diffuseCol; + #endif + + +#endif + +#if RENDER_POINT || RENDER_LINE + color = fragment.color; +#endif + +#if RENDER_NORMALS + color = glm::abs(fragment.eyeNor); + //color.r = (fragment.eyeNor.x + 1.0f) / 2.0f; + //color.g = (fragment.eyeNor.y + 1.0f) / 2.0f; + //color.b = (fragment.eyeNor.z + 1.0f) / 2.0f; + +#endif + +#if RENDER_DEPTH + float z = fragment.z; + z = (z + 1.0f) / 2.0f; + color = glm::vec3(z, z, z); + //color = glm::vec3(1.0f - depth, 1.0f - depth, 1.0f - depth); // depth as color +#endif + + + + framebuffer[index] = color; + } } @@ -636,12 +736,27 @@ void _vertexTransformAndAssembly( // TODO: Apply vertex transformation here // Multiply the MVP matrix for each vertex position, this will transform everything into clipping space + glm::vec4 pos = MVP * glm::vec4(primitive.dev_position[vid], 1.0f); + glm::vec4 eyePos = MV * glm::vec4(primitive.dev_position[vid], 1.0f); // Then divide the pos by its w element to transform into NDC space + pos /= pos.w; // Finally transform x and y to viewport space - + pos.x = (pos.x + 1.0f) * 0.5f * width; + pos.y = (1.0f - pos.y) * 0.5f * height; // TODO: Apply vertex assembly here // Assemble all attribute arraies into the primitive array + primitive.dev_verticesOut[vid].pos = pos; + primitive.dev_verticesOut[vid].eyePos = glm::vec3(eyePos); + primitive.dev_verticesOut[vid].eyeNor = glm::normalize(MV_normal * primitive.dev_normal[vid]); + + if (primitive.dev_diffuseTex != NULL) { + primitive.dev_verticesOut[vid].dev_diffuseTex = primitive.dev_diffuseTex; + primitive.dev_verticesOut[vid].texWidth = primitive.diffuseTexWidth; + primitive.dev_verticesOut[vid].texHeight = primitive.diffuseTexHeight; + primitive.dev_verticesOut[vid].texcoord0 = primitive.dev_texcoord0[vid]; + } + } } @@ -660,12 +775,12 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_ // TODO: uncomment the following code for a start // This is primitive assembly for triangles - //int pid; // id for cur primitives vector - //if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { - // pid = iid / (int)primitive.primitiveType; - // dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] - // = primitive.dev_verticesOut[primitive.dev_indices[iid]]; - //} + int pid; // id for cur primitives vector + if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) { + pid = iid / (int)primitive.primitiveType; + dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType] + = primitive.dev_verticesOut[primitive.dev_indices[iid]]; + } // TODO: other primitive types (point, line) @@ -673,6 +788,134 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_ } +__device__ +void rasterizePoint(glm::vec4 p, glm::vec3 color, Fragment *fragments, int width, int height) { + int x = (int)p.x; + int y = (int)p.y; + + x = x >= width ? width - 1 : x; + x = x < 0 ? 0 : x; + y = y >= height ? height - 1 : y; + y = y < 0 ? 0 : y; + + int idx = x + y * width; + fragments[idx].color = color; + +} + +__device__ +void rasterizeLine(glm::vec4 p1, glm::vec4 p2, glm::vec3 color, Fragment *fragments, int width, int height) { + int x1 = glm::clamp(p1.x, 0.f, (float)(width - 1)); + int x2 = glm::clamp(p2.x, 0.f, (float)(width - 1)); + int y1 = glm::clamp(p1.y, 0.f, (float)(height - 1)); + int y2 = glm::clamp(p2.y, 0.f, (float)(height - 1)); + + if (x1 == x2) { + for (int y = y1; y < y2; ++y) { + int idx = x1 + y * width; + fragments[idx].color = color; + } + } else { + int delta_x = x2 - x1; + int delta_y = y2 - y1; + for (int x = x1; x < x2; ++x) { + float y = y1 + delta_y * (x - x1) / delta_x; + int idx = x + (int)y * width; + fragments[idx].color = color; + } + } + + +} + +__device__ +void rasterizeTriangle(glm::vec3 tri[3], glm::vec3 color, Primitive primitive, Fragment *fragments, int* depths, int width, int height) { + AABB boundingBox = getAABBForTriangle(tri); + if (boundingBox.min.x >= width || boundingBox.min.y >= height || boundingBox.max.x < 0 || boundingBox.max.y < 0) { + return; + } + for (int y = boundingBox.min.y; y < boundingBox.max.y; y++) { + for (int x = boundingBox.min.x; x < boundingBox.max.x; x++) { + if (y >= height || y < 0 || x >= width || x < 0) { + continue; + } + int idx = x + y * width; + glm::vec3 bc = calculateBarycentricCoordinate(tri, glm::vec2(x, y)); + if (isBarycentricCoordInBounds(bc)) { + float z = getZAtCoordinate(bc, tri); + int depth = (int)-100 * z; + atomicMin(&depths[idx], depth); + if (depth == depths[idx]) { + fragments[idx].z = z; + + fragments[idx].color = color; // plain white color + + fragments[idx].dev_diffuseTex = primitive.v[0].dev_diffuseTex; + fragments[idx].texHeight = primitive.v[0].texHeight; + fragments[idx].texWidth = primitive.v[0].texWidth; + + glm::vec3 eyePostions[3]; + eyePostions[0] = primitive.v[0].eyePos; + eyePostions[1] = primitive.v[1].eyePos; + eyePostions[2] = primitive.v[2].eyePos; + + glm::vec3 eyeNormals[3]; + eyeNormals[0] = primitive.v[0].eyeNor; + eyeNormals[1] = primitive.v[1].eyeNor; + eyeNormals[2] = primitive.v[2].eyeNor; + fragments[idx].eyePos = BCInterpVector(bc, eyePostions); + fragments[idx].eyeNor = BCInterpVector(bc, eyeNormals); + + glm::vec2 UVs[3]; + UVs[0] = primitive.v[0].texcoord0; + UVs[1] = primitive.v[1].texcoord0; + UVs[2] = primitive.v[2].texcoord0; + fragments[idx].texcoord0 = PCInterpUV(bc, eyePostions, UVs); + + } + } + } + } +} + +__global__ +void rasterizePrimitives(int totalNumPrimitives, Primitive *primitives, Fragment *fragments, int* depths, int width, int height) { + int pid = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (pid >= totalNumPrimitives) { + return; + } + + glm::vec3 color = glm::vec3(1.0f, 1.0f, 1.0f); + Primitive primitive = primitives[pid]; + glm::vec3 tri[3]; + tri[0] = glm::vec3(primitive.v[0].pos); + tri[1] = glm::vec3(primitive.v[1].pos); + tri[2] = glm::vec3(primitive.v[2].pos); + +#if BACKFACE_CULLING + if (isBackface(tri)) { + return; + } +#endif + +#if RENDER_POINT + rasterizePoint(primitive.v[0].pos, color, fragments, width, height); + rasterizePoint(primitive.v[1].pos, color, fragments, width, height); + rasterizePoint(primitive.v[2].pos, color, fragments, width, height); + + +#elif RENDER_LINE + rasterizeLine(primitive.v[0].pos, primitive.v[1].pos, color, fragments, width, height); + rasterizeLine(primitive.v[1].pos, primitive.v[2].pos, color, fragments, width, height); + rasterizeLine(primitive.v[0].pos, primitive.v[2].pos, color, fragments, width, height); + +#elif RENDER_TRIANGLE + rasterizeTriangle(tri, color, primitive, fragments, depths, width, height); + +#endif + +} /** @@ -710,6 +953,7 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g curPrimitiveBeginId, dev_primitives, *p); + checkCUDAError("Primitive Assembly"); curPrimitiveBeginId += p->numPrimitives; @@ -723,11 +967,16 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g initDepth << > >(width, height, dev_depth); // TODO: rasterize - - + dim3 blockSize(128); + dim3 numBlocksForPrimitives((totalNumPrimitives + blockSize.x - 1) / blockSize.x); + rasterizePrimitives << > > (totalNumPrimitives, dev_primitives, dev_fragmentBuffer, dev_depth, width, height); + checkCUDAError("rasterizer"); // Copy depthbuffer colors into framebuffer + timer().startGpuTimer(); render << > >(width, height, dev_fragmentBuffer, dev_framebuffer); + timer().endGpuTimer(); + checkCUDAError("fragment shader"); // Copy framebuffer into OpenGL buffer for OpenGL previewing sendImageToPBO<<>>(pbo, width, height, dev_framebuffer); diff --git a/src/rasterize.h b/src/rasterize.h index 560aae9..6cad206 100644 --- a/src/rasterize.h +++ b/src/rasterize.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace tinygltf{ class Scene; @@ -22,3 +23,97 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene); void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const glm::mat3 MV_normal); void rasterizeFree(); + +class PerformanceTimer +{ + /** + * This class is used for timing the performance + * Uncopyable and unmovable + * + * Adapted from WindyDarian(https://github.com/WindyDarian) + */ +public: + PerformanceTimer() + { + cudaEventCreate(&event_start); + cudaEventCreate(&event_end); + } + + ~PerformanceTimer() + { + cudaEventDestroy(event_start); + cudaEventDestroy(event_end); + } + + void startCpuTimer() + { + if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); } + cpu_timer_started = true; + + time_start_cpu = std::chrono::high_resolution_clock::now(); + } + + void endCpuTimer() + { + time_end_cpu = std::chrono::high_resolution_clock::now(); + + if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); } + + std::chrono::duration duro = time_end_cpu - time_start_cpu; + prev_elapsed_time_cpu_milliseconds = + static_cast(duro.count()); + + cpu_timer_started = false; + } + + void startGpuTimer() + { + if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); } + gpu_timer_started = true; + + cudaEventRecord(event_start); + } + + void endGpuTimer() + { + cudaEventRecord(event_end); + cudaEventSynchronize(event_end); + + if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); } + + cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); + gpu_timer_started = false; + } + + float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015 + { + return prev_elapsed_time_cpu_milliseconds; + } + + float getGpuElapsedTimeForPreviousOperation() //noexcept + { + return prev_elapsed_time_gpu_milliseconds; + } + + // remove copy and move functions + PerformanceTimer(const PerformanceTimer&) = delete; + PerformanceTimer(PerformanceTimer&&) = delete; + PerformanceTimer& operator=(const PerformanceTimer&) = delete; + PerformanceTimer& operator=(PerformanceTimer&&) = delete; + +private: + cudaEvent_t event_start = nullptr; + cudaEvent_t event_end = nullptr; + + using time_point_t = std::chrono::high_resolution_clock::time_point; + time_point_t time_start_cpu; + time_point_t time_end_cpu; + + bool cpu_timer_started = false; + bool gpu_timer_started = false; + + float prev_elapsed_time_cpu_milliseconds = 0.f; + float prev_elapsed_time_gpu_milliseconds = 0.f; +}; + +PerformanceTimer& timer(); diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h index 46c701e..796108a 100644 --- a/src/rasterizeTools.h +++ b/src/rasterizeTools.h @@ -99,3 +99,40 @@ float getZAtCoordinate(const glm::vec3 barycentricCoord, const glm::vec3 tri[3]) + barycentricCoord.y * tri[1].z + barycentricCoord.z * tri[2].z); } + +__host__ __device__ static +glm::vec3 BCInterpVector(glm::vec3 bc, glm::vec3 vectors[3]) { + /*glm::vec3 tri1[3] = { tri[0], tri[1], bc }; + glm::vec3 tri2[3] = { tri[1], tri[2], bc }; + glm::vec3 tri3[3] = { tri[0], tri[2], bc }; + float s = calculateSignedArea(tri); + float s1 = calculateSignedArea(tri1); + float s2 = calculateSignedArea(tri2); + float s3 = calculateSignedArea(tri3); + + return vectors[0] * s2 / s + vectors[1] * s3 / s + vectors[2] * s1 / s;*/ + + return glm::normalize(bc.x * vectors[0] + bc.y * vectors[1] + bc.z * vectors[2]); +} + +__host__ __device__ static +glm::vec2 PCInterpUV(glm::vec3 bc, glm::vec3 eyePositions[3], glm::vec2 UVs[3]) { + + + glm::vec2 tz = bc.x * UVs[0] / eyePositions[0].z + bc.y * UVs[1] / eyePositions[1].z + bc.z * UVs[2] / eyePositions[2].z; + float cz = bc.x / eyePositions[0].z + bc.y / eyePositions[1].z + bc.z / eyePositions[2].z; + return tz / cz; + +} + +__host__ __device__ static +bool isBackface(const glm::vec3 tri[3]) { + glm::vec3 v1 = tri[1] - tri[0]; + glm::vec3 v2 = tri[2] - tri[0]; + glm::vec3 nor = glm::cross(v1, v2); + glm::vec3 eyeDir = glm::vec3(0.0f, 0.0f, -1.0f); + if (glm::dot(nor, eyeDir) < 0) { + return true; + } + return false; +} \ No newline at end of file