From 64016384e55d9d6dd7bcfef23e947606466d0bd6 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Tue, 19 Oct 2021 20:01:10 -0400 Subject: [PATCH 01/14] normals Done --- src/pathtrace.cu | 21 +++++++++++++++++++-- src/sceneStructs.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 23e5f90..ecb0258 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -73,7 +73,7 @@ __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* g if (x < resolution.x && y < resolution.y) { int index = x + (y * resolution.x); - float timeToIntersect = gBuffer[index].t * 256.0; + float timeToIntersect = gBuffer[index].t * 256.0f; pbo[index].w = 0; pbo[index].x = timeToIntersect; @@ -82,6 +82,21 @@ __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* g } } +__global__ void gbufferToPBO_Normals(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + glm::vec3 normal = glm::normalize(gBuffer[index].normals) * 255.0f; + pbo[index].w = 0; + pbo[index].x = normal.x; + pbo[index].y = normal.y; + pbo[index].z = normal.z; + } +} + static Scene * hst_scene = NULL; static glm::vec3 * dev_image = NULL; static Geom * dev_geoms = NULL; @@ -282,6 +297,7 @@ __global__ void generateGBuffer ( if (idx < num_paths) { gBuffer[idx].t = shadeableIntersections[idx].t; + gBuffer[idx].normals = shadeableIntersections[idx].surfaceNormal; } } @@ -418,7 +434,8 @@ void showGBuffer(uchar4* pbo) { (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization - gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); + //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); } void showImage(uchar4* pbo, int iter) { diff --git a/src/sceneStructs.h b/src/sceneStructs.h index da7e558..ecedf09 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -79,4 +79,5 @@ struct ShadeableIntersection { // What information might be helpful for guiding a denoising filter? struct GBufferPixel { float t; + glm::vec3 normals; }; From 0fc7844d76ba36dd2538b26da53fb82813b2d17b Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Tue, 19 Oct 2021 21:01:58 -0400 Subject: [PATCH 02/14] GBufPosition Completed --- src/pathtrace.cu | 22 +++++++++++++++++++--- src/sceneStructs.h | 3 ++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index ecb0258..087000a 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -89,7 +89,7 @@ __global__ void gbufferToPBO_Normals(uchar4* pbo, glm::ivec2 resolution, GBuffer if (x < resolution.x && y < resolution.y) { int index = x + (y * resolution.x); - glm::vec3 normal = glm::normalize(gBuffer[index].normals) * 255.0f; + glm::vec3 normal = gBuffer[index].normal * 256.0f; pbo[index].w = 0; pbo[index].x = normal.x; pbo[index].y = normal.y; @@ -97,6 +97,21 @@ __global__ void gbufferToPBO_Normals(uchar4* pbo, glm::ivec2 resolution, GBuffer } } +__global__ void gbufferToPBO_Position(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + glm::vec3 position = glm::abs(gBuffer[index].position) * 20.0f; + pbo[index].w = 0; + pbo[index].x = position[0]; + pbo[index].y = position[1]; + pbo[index].z = position[2]; + } +} + static Scene * hst_scene = NULL; static glm::vec3 * dev_image = NULL; static Geom * dev_geoms = NULL; @@ -297,7 +312,8 @@ __global__ void generateGBuffer ( if (idx < num_paths) { gBuffer[idx].t = shadeableIntersections[idx].t; - gBuffer[idx].normals = shadeableIntersections[idx].surfaceNormal; + gBuffer[idx].normal = shadeableIntersections[idx].surfaceNormal; + gBuffer[idx].position = getPointOnRay(pathSegments[idx].ray, shadeableIntersections[idx].t); } } @@ -435,7 +451,7 @@ void showGBuffer(uchar4* pbo) { // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); - gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); } void showImage(uchar4* pbo, int iter) { diff --git a/src/sceneStructs.h b/src/sceneStructs.h index ecedf09..131f211 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -79,5 +79,6 @@ struct ShadeableIntersection { // What information might be helpful for guiding a denoising filter? struct GBufferPixel { float t; - glm::vec3 normals; + glm::vec3 normal; + glm::vec3 position; }; From cf49f402ae29db76ff45d9d6eb4c1d09fca27e82 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Wed, 20 Oct 2021 00:57:00 -0400 Subject: [PATCH 03/14] Update --- src/main.cpp | 1 + src/pathtrace.cu | 802 ++++++++++++++++++++++++++------------------- src/sceneStructs.h | 1 + 3 files changed, 464 insertions(+), 340 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 4092ae4..1654770 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,6 +45,7 @@ int iteration; int width; int height; + //------------------------------- //-------------MAIN-------------- //------------------------------- diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 087000a..2ae338c 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -18,146 +18,210 @@ #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) -void checkCUDAErrorFn(const char *msg, const char *file, int line) { +void checkCUDAErrorFn(const char* msg, const char* file, int line) { #if ERRORCHECK - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } - - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); # ifdef _WIN32 - getchar(); + getchar(); # endif - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); #endif } + +float gaussianKernel[25] = { 0.003765, 0.015019, 0.023792, 0.015019, 0.003765, +0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +0.023792, 0.094907, 0.150342, 0.094907, 0.023792, +0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +0.003765, 0.015019, 0.023792, 0.015019, 0.003765, }; + + +glm::vec2 offsetKernel[25]; + __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { - int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); - return thrust::default_random_engine(h); + int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); + return thrust::default_random_engine(h); } //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int)((pix.x / iter) * 255.0), 0, 255); + color.y = glm::clamp((int)((pix.y / iter) * 255.0), 0, 255); + color.z = glm::clamp((int)((pix.z / iter) * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - float timeToIntersect = gBuffer[index].t * 256.0f; - - pbo[index].w = 0; - pbo[index].x = timeToIntersect; - pbo[index].y = timeToIntersect; - pbo[index].z = timeToIntersect; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + float timeToIntersect = gBuffer[index].t * 256.0f; + + pbo[index].w = 0; + pbo[index].x = timeToIntersect; + pbo[index].y = timeToIntersect; + pbo[index].z = timeToIntersect; + } } __global__ void gbufferToPBO_Normals(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - - glm::vec3 normal = gBuffer[index].normal * 256.0f; - pbo[index].w = 0; - pbo[index].x = normal.x; - pbo[index].y = normal.y; - pbo[index].z = normal.z; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + glm::vec3 normal = gBuffer[index].normal * 255.0f; + pbo[index].w = 0; + pbo[index].x = normal.x; + pbo[index].y = normal.y; + pbo[index].z = normal.z; + } } __global__ void gbufferToPBO_Position(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - - glm::vec3 position = glm::abs(gBuffer[index].position) * 20.0f; - pbo[index].w = 0; - pbo[index].x = position[0]; - pbo[index].y = position[1]; - pbo[index].z = position[2]; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + glm::vec3 position = glm::abs(gBuffer[index].position) * 20.0f; + pbo[index].w = 0; + pbo[index].x = position[0]; + pbo[index].y = position[1]; + pbo[index].z = position[2]; + } +} + +__global__ void gbufferToPBO_Atrous(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer, glm::vec3* TrousImage) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + + + glm::vec3 pix = TrousImage[index]; + glm::ivec3 color; + + + color.x = glm::clamp((int)(pix.x * 255.0), 0, 255); + color.y = glm::clamp((int)(pix.y * 255.0), 0, 255); + color.z = glm::clamp((int)(pix.z * 255.0), 0, 255); +; + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } -static Scene * hst_scene = NULL; -static glm::vec3 * dev_image = NULL; -static Geom * dev_geoms = NULL; -static Material * dev_materials = NULL; -static PathSegment * dev_paths = NULL; -static ShadeableIntersection * dev_intersections = NULL; +static Scene* hst_scene = NULL; +static glm::vec3* dev_image = NULL; +static Geom* dev_geoms = NULL; +static Material* dev_materials = NULL; +static PathSegment* dev_paths = NULL; +static ShadeableIntersection* dev_intersections = NULL; static GBufferPixel* dev_gBuffer = NULL; // TODO: static variables for device memory, any extra info you need, etc // ... +static float* dev_gausKernel = NULL; +static glm::vec2* dev_offsetKernel = NULL; +static glm::vec3* dev_TrousImage = NULL; +static glm::vec3* dev_IntermediaryImage = NULL; -void pathtraceInit(Scene *scene) { - hst_scene = scene; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; +void generateOffsetKern() +{ + int index = 0; + for (int y = -2; y <= 2; y++) + { + for (int x = -2; x <= 2; x++) + { + offsetKernel[index] = glm::vec2(x, y); + index++; + } + } +} + +void pathtraceInit(Scene* scene) { + hst_scene = scene; + const Camera& cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; - cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); - cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); - cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); - cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); - cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); + cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); + cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); - cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); - cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + // TODO: initialize any extra device memeory you need - cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel)); + cudaMalloc(&dev_gausKernel, 25 * sizeof(float)); + cudaMemcpy(dev_gausKernel, gaussianKernel, 25 * sizeof(float), cudaMemcpyHostToDevice); - // TODO: initialize any extra device memeory you need + generateOffsetKern(); + cudaMalloc(&dev_offsetKernel, 25 * sizeof(glm::vec2)); + cudaMemcpy(dev_offsetKernel, offsetKernel, 25 * sizeof(glm::vec2), cudaMemcpyHostToDevice); - checkCUDAError("pathtraceInit"); + cudaMalloc(&dev_TrousImage, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_IntermediaryImage, pixelcount * sizeof(glm::vec3)); + + checkCUDAError("pathtraceInit"); } void pathtraceFree() { - cudaFree(dev_image); // no-op if dev_image is null - cudaFree(dev_paths); - cudaFree(dev_geoms); - cudaFree(dev_materials); - cudaFree(dev_intersections); - cudaFree(dev_gBuffer); - // TODO: clean up any extra device memory you created - - checkCUDAError("pathtraceFree"); + cudaFree(dev_image); // no-op if dev_image is null + cudaFree(dev_paths); + cudaFree(dev_geoms); + cudaFree(dev_materials); + cudaFree(dev_intersections); + cudaFree(dev_gBuffer); + // TODO: clean up any extra device memory you created + + cudaFree(dev_gausKernel); + cudaFree(dev_offsetKernel); + cudaFree(dev_TrousImage); + cudaFree(dev_IntermediaryImage); + checkCUDAError("pathtraceFree"); } /** @@ -175,292 +239,350 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path if (x < cam.resolution.x && y < cam.resolution.y) { int index = x + (y * cam.resolution.x); - PathSegment & segment = pathSegments[index]; + PathSegment& segment = pathSegments[index]; segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); segment.ray.direction = glm::normalize(cam.view - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) - ); + ); segment.pixelIndex = index; segment.remainingBounces = traceDepth; } } -__global__ void computeIntersections( - int depth - , int num_paths - , PathSegment * pathSegments - , Geom * geoms - , int geoms_size - , ShadeableIntersection * intersections - ) + +__global__ void CopyDataToInterImage( + int iter, int num_paths, + PathSegment* pathSegments, glm::vec3* dev_interImage) { int path_index = blockIdx.x * blockDim.x + threadIdx.x; if (path_index < num_paths) { - PathSegment pathSegment = pathSegments[path_index]; - float t; - glm::vec3 intersect_point; - glm::vec3 normal; - float t_min = FLT_MAX; - int hit_geom_index = -1; - bool outside = true; + PathSegment iterationPath = pathSegments[path_index]; + glm::vec3 currColor = iterationPath.color; + dev_interImage[iterationPath.pixelIndex] = iterationPath.color / (float)iter; + } +} + + __global__ void GenerateATrousImage( + int iter, int num_paths, + float* dev_gausKernel, glm::vec2 *dev_offsetKernel, + glm::vec3* dev_interImage, glm::vec3 *dev_TrousImage, + GBufferPixel * gbuf, const Camera cam + ) + { + + int index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index < num_paths) + { + //glm::vec3 currColor =glm::vec3(0.0f); + glm::vec3 currColor = dev_interImage[index]; + /* for (int i = 0; i < 25; i++) + { + + float offsetX = dev_offsetKernel[i].x; + float offsetY = dev_offsetKernel[i].y; + float gausValue = dev_gausKernel[i]; + int offsetColorIdx = index + (offsetY * cam.resolution.x + offsetX); + if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + { + glm::vec3 newColor = dev_interImage[offsetColorIdx]; + currColor += newColor * dev_gausKernel[i]; + } + }*/ + dev_TrousImage[index] = currColor; + } - glm::vec3 tmp_intersect; - glm::vec3 tmp_normal; + } - // naive parse through global geoms - for (int i = 0; i < geoms_size; i++) + __global__ void computeIntersections( + int depth + , int num_paths + , PathSegment * pathSegments + , Geom * geoms + , int geoms_size + , ShadeableIntersection * intersections + ) + { + int path_index = blockIdx.x * blockDim.x + threadIdx.x; + + if (path_index < num_paths) { - Geom & geom = geoms[i]; + PathSegment pathSegment = pathSegments[path_index]; + + float t; + glm::vec3 intersect_point; + glm::vec3 normal; + float t_min = FLT_MAX; + int hit_geom_index = -1; + bool outside = true; - if (geom.type == CUBE) + glm::vec3 tmp_intersect; + glm::vec3 tmp_normal; + + // naive parse through global geoms + + for (int i = 0; i < geoms_size; i++) { - t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + Geom& geom = geoms[i]; + + if (geom.type == CUBE) + { + t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + else if (geom.type == SPHERE) + { + t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + + // Compute the minimum t from the intersection tests to determine what + // scene geometry object was hit first. + if (t > 0.0f && t_min > t) + { + t_min = t; + hit_geom_index = i; + intersect_point = tmp_intersect; + normal = tmp_normal; + } } - else if (geom.type == SPHERE) + + if (hit_geom_index == -1) { - t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + intersections[path_index].t = -1.0f; } - - // Compute the minimum t from the intersection tests to determine what - // scene geometry object was hit first. - if (t > 0.0f && t_min > t) + else { - t_min = t; - hit_geom_index = i; - intersect_point = tmp_intersect; - normal = tmp_normal; + //The ray hits something + intersections[path_index].t = t_min; + intersections[path_index].materialId = geoms[hit_geom_index].materialid; + intersections[path_index].surfaceNormal = normal; } } + } - if (hit_geom_index == -1) + __global__ void shadeSimpleMaterials( + int iter + , int num_paths + , ShadeableIntersection * shadeableIntersections + , PathSegment * pathSegments + , Material * materials + ) + { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) { - intersections[path_index].t = -1.0f; + ShadeableIntersection intersection = shadeableIntersections[idx]; + PathSegment segment = pathSegments[idx]; + if (segment.remainingBounces == 0) { + return; + } + + if (intersection.t > 0.0f) { // if the intersection exists... + segment.remainingBounces--; + // Set up the RNG + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + segment.color *= (materialColor * material.emittance); + segment.remainingBounces = 0; + } + else { + segment.color *= materialColor; + glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin; + scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng); + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } + else { + segment.color = glm::vec3(0.0f); + segment.remainingBounces = 0; + } + + pathSegments[idx] = segment; } - else + } + + __global__ void generateGBuffer( + int num_paths, + ShadeableIntersection * shadeableIntersections, + PathSegment * pathSegments, + GBufferPixel * gBuffer) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) { - //The ray hits something - intersections[path_index].t = t_min; - intersections[path_index].materialId = geoms[hit_geom_index].materialid; - intersections[path_index].surfaceNormal = normal; + int pixelPosition = pathSegments[idx].pixelIndex; + gBuffer[pixelPosition].t = shadeableIntersections[idx].t; + gBuffer[pixelPosition].normal = shadeableIntersections[idx].surfaceNormal; + gBuffer[pixelPosition].position = getPointOnRay(pathSegments[idx].ray, shadeableIntersections[idx].t); } } -} - -__global__ void shadeSimpleMaterials ( - int iter - , int num_paths - , ShadeableIntersection * shadeableIntersections - , PathSegment * pathSegments - , Material * materials - ) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - ShadeableIntersection intersection = shadeableIntersections[idx]; - PathSegment segment = pathSegments[idx]; - if (segment.remainingBounces == 0) { - return; - } - - if (intersection.t > 0.0f) { // if the intersection exists... - segment.remainingBounces--; - // Set up the RNG - thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces); - - Material material = materials[intersection.materialId]; - glm::vec3 materialColor = material.color; - - // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - segment.color *= (materialColor * material.emittance); - segment.remainingBounces = 0; - } - else { - segment.color *= materialColor; - glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin; - scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng); - } - // If there was no intersection, color the ray black. - // Lots of renderers use 4 channel color, RGBA, where A = alpha, often - // used for opacity, in which case they can indicate "no opacity". - // This can be useful for post-processing and image compositing. - } else { - segment.color = glm::vec3(0.0f); - segment.remainingBounces = 0; - } - - pathSegments[idx] = segment; - } -} - -__global__ void generateGBuffer ( - int num_paths, - ShadeableIntersection* shadeableIntersections, - PathSegment* pathSegments, - GBufferPixel* gBuffer) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - gBuffer[idx].t = shadeableIntersections[idx].t; - gBuffer[idx].normal = shadeableIntersections[idx].surfaceNormal; - gBuffer[idx].position = getPointOnRay(pathSegments[idx].ray, shadeableIntersections[idx].t); - } -} -// Add the current iteration's output to the overall image -__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths) -{ - int index = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (index < nPaths) + // Add the current iteration's output to the overall image + __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths) { - PathSegment iterationPath = iterationPaths[index]; - image[iterationPath.pixelIndex] += iterationPath.color; + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (index < nPaths) + { + PathSegment iterationPath = iterationPaths[index]; + image[iterationPath.pixelIndex] += iterationPath.color; + } } -} -/** - * Wrapper for the __global__ call that sets up the kernel calls and does a ton - * of memory management - */ -void pathtrace(int frame, int iter) { - const int traceDepth = hst_scene->state.traceDepth; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; - - // 2D block for generating ray from camera - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - - // 1D block for path tracing - const int blockSize1d = 128; - - /////////////////////////////////////////////////////////////////////////// - - // Pathtracing Recap: - // * Initialize array of path rays (using rays that come out of the camera) - // * You can pass the Camera object to that kernel. - // * Each path ray must carry at minimum a (ray, color) pair, - // * where color starts as the multiplicative identity, white = (1, 1, 1). - // * This has already been done for you. - // * NEW: For the first depth, generate geometry buffers (gbuffers) - // * For each depth: - // * Compute an intersection in the scene for each path ray. - // A very naive version of this has been implemented for you, but feel - // free to add more primitives and/or a better algorithm. - // Currently, intersection distance is recorded as a parametric distance, - // t, or a "distance along the ray." t = -1.0 indicates no intersection. - // * Color is attenuated (multiplied) by reflections off of any object - // * Stream compact away all of the terminated paths. - // You may use either your implementation or `thrust::remove_if` or its - // cousins. - // * Note that you can't really use a 2D kernel launch any more - switch - // to 1D. - // * Shade the rays that intersected something or didn't bottom out. - // That is, color the ray by performing a color computation according - // to the shader, then generate a new ray to continue the ray path. - // We recommend just updating the ray's PathSegment in place. - // Note that this step may come before or after stream compaction, - // since some shaders you write may also cause a path to terminate. - // * Finally: - // * if not denoising, add this iteration's results to the image - // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl - - generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); - checkCUDAError("generate camera ray"); - - int depth = 0; - PathSegment* dev_path_end = dev_paths + pixelcount; - int num_paths = dev_path_end - dev_paths; - - // --- PathSegment Tracing Stage --- - // Shoot ray into scene, bounce between objects, push shading chunks - - // Empty gbuffer - cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel)); - - // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + /** + * Wrapper for the __global__ call that sets up the kernel calls and does a ton + * of memory management + */ + void pathtrace(int frame, int iter) { + const int traceDepth = hst_scene->state.traceDepth; + const Camera& cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; + + // 2D block for generating ray from camera + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // 1D block for path tracing + const int blockSize1d = 128; + + /////////////////////////////////////////////////////////////////////////// + + // Pathtracing Recap: + // * Initialize array of path rays (using rays that come out of the camera) + // * You can pass the Camera object to that kernel. + // * Each path ray must carry at minimum a (ray, color) pair, + // * where color starts as the multiplicative identity, white = (1, 1, 1). + // * This has already been done for you. + // * NEW: For the first depth, generate geometry buffers (gbuffers) + // * For each depth: + // * Compute an intersection in the scene for each path ray. + // A very naive version of this has been implemented for you, but feel + // free to add more primitives and/or a better algorithm. + // Currently, intersection distance is recorded as a parametric distance, + // t, or a "distance along the ray." t = -1.0 indicates no intersection. + // * Color is attenuated (multiplied) by reflections off of any object + // * Stream compact away all of the terminated paths. + // You may use either your implementation or `thrust::remove_if` or its + // cousins. + // * Note that you can't really use a 2D kernel launch any more - switch + // to 1D. + // * Shade the rays that intersected something or didn't bottom out. + // That is, color the ray by performing a color computation according + // to the shader, then generate a new ray to continue the ray path. + // We recommend just updating the ray's PathSegment in place. + // Note that this step may come before or after stream compaction, + // since some shaders you write may also cause a path to terminate. + // * Finally: + // * if not denoising, add this iteration's results to the image + // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl + + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths); + checkCUDAError("generate camera ray"); + + int depth = 0; + PathSegment* dev_path_end = dev_paths + pixelcount; + int num_paths = dev_path_end - dev_paths; + + // --- PathSegment Tracing Stage --- + // Shoot ray into scene, bounce between objects, push shading chunks + + // Empty gbuffer + cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel)); + + // clean shading chunks + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + bool iterationComplete = false; + while (!iterationComplete) { + + // tracing + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + computeIntersections << > > ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + + if (depth == 0) { + generateGBuffer << > > (num_paths, dev_intersections, dev_paths, dev_gBuffer); + } - bool iterationComplete = false; - while (!iterationComplete) { - - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections - ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); + CopyDataToInterImage << > > (iter, num_paths, dev_paths, dev_IntermediaryImage); - if (depth == 0) { - generateGBuffer<<>>(num_paths, dev_intersections, dev_paths, dev_gBuffer); - } + GenerateATrousImage << > > (depth, num_paths, dev_gausKernel, dev_offsetKernel, + dev_IntermediaryImage, dev_TrousImage, dev_gBuffer, cam); - depth++; + depth++; + shadeSimpleMaterials << > > ( + iter, + num_paths, + dev_intersections, + dev_paths, + dev_materials + ); + iterationComplete = depth == traceDepth; + } - shadeSimpleMaterials<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = depth == traceDepth; - } + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + finalGather << > > (num_paths, dev_image, dev_paths); - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; - finalGather<<>>(num_paths, dev_image, dev_paths); + /////////////////////////////////////////////////////////////////////////// - /////////////////////////////////////////////////////////////////////////// + // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. + // Otherwise, screenshots are also acceptable. + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. - // Otherwise, screenshots are also acceptable. - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + checkCUDAError("pathtrace"); + } - checkCUDAError("pathtrace"); -} + // CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging. + void showGBuffer(uchar4 * pbo) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); -// CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging. -void showGBuffer(uchar4* pbo) { - const Camera &cam = hst_scene->state.camera; - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - - // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization - //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); - gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); -} -void showImage(uchar4* pbo, int iter) { -const Camera &cam = hst_scene->state.camera; - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization + //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); + //gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); + //gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); + } + + void showImage(uchar4 * pbo, int iter) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); -} + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); + } diff --git a/src/sceneStructs.h b/src/sceneStructs.h index 131f211..d455543 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -81,4 +81,5 @@ struct GBufferPixel { float t; glm::vec3 normal; glm::vec3 position; + glm::vec3 pathColor; }; From 7800325ba0997ea99f39d94ae1d41369dae865c9 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Wed, 20 Oct 2021 13:46:21 -0400 Subject: [PATCH 04/14] normals Fixed --- src/pathtrace.cu | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 2ae338c..7775735 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -99,11 +99,16 @@ __global__ void gbufferToPBO_Normals(uchar4* pbo, glm::ivec2 resolution, GBuffer if (x < resolution.x && y < resolution.y) { int index = x + (y * resolution.x); - glm::vec3 normal = gBuffer[index].normal * 255.0f; + glm::vec3 normal = glm::abs(gBuffer[index].normal); + glm::ivec3 color; + color.x = glm::clamp((int)(normal.x * 255.0), 0, 255); + color.y = glm::clamp((int)(normal.y * 255.0), 0, 255); + color.z = glm::clamp((int)(normal.z * 255.0), 0, 255); + pbo[index].w = 0; - pbo[index].x = normal.x; - pbo[index].y = normal.y; - pbo[index].z = normal.z; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; } } @@ -571,9 +576,9 @@ __global__ void CopyDataToInterImage( // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); - //gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); //gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); - gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); + //gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); } void showImage(uchar4 * pbo, int iter) { From bc0e250ab33170d69ed1dbf9d7070828b55cb343 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Wed, 20 Oct 2021 13:48:47 -0400 Subject: [PATCH 05/14] postition Updated --- src/pathtrace.cu | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 7775735..0860f76 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -119,11 +119,16 @@ __global__ void gbufferToPBO_Position(uchar4* pbo, glm::ivec2 resolution, GBuffe if (x < resolution.x && y < resolution.y) { int index = x + (y * resolution.x); - glm::vec3 position = glm::abs(gBuffer[index].position) * 20.0f; + glm::vec3 position = glm::abs(gBuffer[index].position) ; + glm::ivec3 color; + color.x = glm::clamp((int)(position.x * 20.0), 0, 255); + color.y = glm::clamp((int)(position.y * 20.0), 0, 255); + color.z = glm::clamp((int)(position.z * 20.0), 0, 255); + pbo[index].w = 0; - pbo[index].x = position[0]; - pbo[index].y = position[1]; - pbo[index].z = position[2]; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; } } @@ -576,8 +581,8 @@ __global__ void CopyDataToInterImage( // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); - gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); - //gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); + //gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); //gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); } From 0d7a05e1524a668b8a98fc0145f3d6e44944a9dc Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Wed, 20 Oct 2021 16:40:13 -0400 Subject: [PATCH 06/14] Gaussian BLur Fixed --- src/pathtrace.cu | 84 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 0860f76..cfd0e67 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -144,9 +144,9 @@ __global__ void gbufferToPBO_Atrous(uchar4* pbo, glm::ivec2 resolution, GBufferP glm::ivec3 color; - color.x = glm::clamp((int)(pix.x * 255.0), 0, 255); - color.y = glm::clamp((int)(pix.y * 255.0), 0, 255); - color.z = glm::clamp((int)(pix.z * 255.0), 0, 255); + color.x = glm::clamp((int)(pix.x/10 * 255.0), 0, 255); + color.y = glm::clamp((int)(pix.y/10 * 255.0), 0, 255); + color.z = glm::clamp((int)(pix.z/10 * 255.0), 0, 255); ; pbo[index].w = 0; pbo[index].x = color.x; @@ -167,7 +167,7 @@ static GBufferPixel* dev_gBuffer = NULL; static float* dev_gausKernel = NULL; static glm::vec2* dev_offsetKernel = NULL; static glm::vec3* dev_TrousImage = NULL; -static glm::vec3* dev_IntermediaryImage = NULL; +//static glm::vec3* dev_IntermediaryImage = NULL; void generateOffsetKern() { @@ -213,7 +213,7 @@ void pathtraceInit(Scene* scene) { cudaMemcpy(dev_offsetKernel, offsetKernel, 25 * sizeof(glm::vec2), cudaMemcpyHostToDevice); cudaMalloc(&dev_TrousImage, pixelcount * sizeof(glm::vec3)); - cudaMalloc(&dev_IntermediaryImage, pixelcount * sizeof(glm::vec3)); + /*cudaMalloc(&dev_IntermediaryImage, pixelcount * sizeof(glm::vec3));*/ checkCUDAError("pathtraceInit"); } @@ -230,7 +230,7 @@ void pathtraceFree() { cudaFree(dev_gausKernel); cudaFree(dev_offsetKernel); cudaFree(dev_TrousImage); - cudaFree(dev_IntermediaryImage); + /*cudaFree(dev_IntermediaryImage);*/ checkCUDAError("pathtraceFree"); } @@ -275,16 +275,16 @@ __global__ void CopyDataToInterImage( { PathSegment iterationPath = pathSegments[path_index]; - glm::vec3 currColor = iterationPath.color; - dev_interImage[iterationPath.pixelIndex] = iterationPath.color / (float)iter; + glm::vec3 currColor = dev_interImage[iterationPath.pixelIndex] + iterationPath.color; + dev_interImage[iterationPath.pixelIndex] += iterationPath.color ; } } - __global__ void GenerateATrousImage( - int iter, int num_paths, + __global__ void GenerateGaussianBlur( + int num_paths, float* dev_gausKernel, glm::vec2 *dev_offsetKernel, - glm::vec3* dev_interImage, glm::vec3 *dev_TrousImage, - GBufferPixel * gbuf, const Camera cam + glm::vec3* dev_colorImage, glm::vec3 *dev_TrousImage, + const Camera cam ) { @@ -292,9 +292,9 @@ __global__ void CopyDataToInterImage( if (index < num_paths) { - //glm::vec3 currColor =glm::vec3(0.0f); - glm::vec3 currColor = dev_interImage[index]; - /* for (int i = 0; i < 25; i++) + glm::vec3 currColor =glm::vec3(0.0f); + //glm::vec3 currColor = dev_colorImage[index]; + for (int i = 0; i < 25; i++) { float offsetX = dev_offsetKernel[i].x; @@ -303,15 +303,47 @@ __global__ void CopyDataToInterImage( int offsetColorIdx = index + (offsetY * cam.resolution.x + offsetX); if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) { - glm::vec3 newColor = dev_interImage[offsetColorIdx]; + glm::vec3 newColor = dev_colorImage[offsetColorIdx]; currColor += newColor * dev_gausKernel[i]; } - }*/ + } dev_TrousImage[index] = currColor; } } + //__global__ void GenerateGaussianBlur( + // int num_paths, + // float* dev_gausKernel, glm::vec2* dev_offsetKernel, + // glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, + // GBufferPixel* gbuf, const Camera cam + //) + //{ + + // int index = blockIdx.x * blockDim.x + threadIdx.x; + + // if (index < num_paths) + // { + // glm::vec3 currColor = glm::vec3(0.0f); + // //glm::vec3 currColor = dev_colorImage[index]; + // for (int i = 0; i < 25; i++) + // { + + // float offsetX = dev_offsetKernel[i].x; + // float offsetY = dev_offsetKernel[i].y; + // float gausValue = dev_gausKernel[i]; + // int offsetColorIdx = index + (offsetY * cam.resolution.x + offsetX); + // if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + // { + // glm::vec3 newColor = dev_colorImage[offsetColorIdx]; + // currColor += newColor * dev_gausKernel[i]; + // } + // } + // dev_TrousImage[index] = currColor; + // } + + //} + __global__ void computeIntersections( int depth @@ -538,11 +570,12 @@ __global__ void CopyDataToInterImage( if (depth == 0) { generateGBuffer << > > (num_paths, dev_intersections, dev_paths, dev_gBuffer); } + /* if (depth == traceDepth) { + CopyDataToInterImage << > > (iter, num_paths, dev_paths, dev_IntermediaryImage); - CopyDataToInterImage << > > (iter, num_paths, dev_paths, dev_IntermediaryImage); - - GenerateATrousImage << > > (depth, num_paths, dev_gausKernel, dev_offsetKernel, - dev_IntermediaryImage, dev_TrousImage, dev_gBuffer, cam); + GenerateATrousImage << > > (depth, num_paths, dev_gausKernel, dev_offsetKernel, + dev_IntermediaryImage, dev_TrousImage, dev_gBuffer, cam); + }*/ depth++; shadeSimpleMaterials << > > ( @@ -554,11 +587,14 @@ __global__ void CopyDataToInterImage( ); iterationComplete = depth == traceDepth; } - // Assemble this iteration and apply it to the image dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; finalGather << > > (num_paths, dev_image, dev_paths); + GenerateGaussianBlur << > > (num_paths, dev_gausKernel, dev_offsetKernel, + dev_image, dev_TrousImage, cam); + //GenerateATrousImage << > > (depth, num_paths, dev_gausKernel, dev_offsetKernel, + // dev_image, dev_TrousImage, dev_gBuffer, cam); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. @@ -582,8 +618,8 @@ __global__ void CopyDataToInterImage( // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); //gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); - gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); - //gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); + //gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); + gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); } void showImage(uchar4 * pbo, int iter) { From 6a26cb493cec01d1e80054157aa6c36ff8e2ed9c Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Wed, 20 Oct 2021 17:50:05 -0400 Subject: [PATCH 07/14] Gaussian Updated --- src/pathtrace.cu | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index cfd0e67..2d572ef 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -258,7 +258,6 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) ); - segment.pixelIndex = index; segment.remainingBounces = traceDepth; } @@ -296,15 +295,24 @@ __global__ void CopyDataToInterImage( //glm::vec3 currColor = dev_colorImage[index]; for (int i = 0; i < 25; i++) { + int index2D_y = index / cam.resolution.x; + int index2D_x = (int)(index % cam.resolution.x); + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = index2D_x + offsetX; + int finalValue_Y = index2D_y + offsetY; - float offsetX = dev_offsetKernel[i].x; - float offsetY = dev_offsetKernel[i].y; - float gausValue = dev_gausKernel[i]; - int offsetColorIdx = index + (offsetY * cam.resolution.x + offsetX); - if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + if (finalValue_X >= 0 && finalValue_X <= (cam.resolution.x - 1) && finalValue_Y >= 0 && finalValue_Y <= (cam.resolution.y - 1)) { - glm::vec3 newColor = dev_colorImage[offsetColorIdx]; - currColor += newColor * dev_gausKernel[i]; + float gausValue = dev_gausKernel[i]; + int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + { + glm::vec3 newColor = dev_colorImage[offsetColorIdx]; + currColor += newColor * dev_gausKernel[i]; + } } } dev_TrousImage[index] = currColor; From 42c725df2e118cd0a0367cf662a980b6f1e07a83 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Thu, 21 Oct 2021 02:01:07 -0400 Subject: [PATCH 08/14] A lot of changes --- src/main.cpp | 31 ++++++- src/pathtrace.cu | 197 ++++++++++++++++++++++++++++++++------------- src/pathtrace.h | 2 +- src/sceneStructs.h | 1 - 4 files changed, 169 insertions(+), 62 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 1654770..aa92675 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,6 +45,30 @@ int iteration; int width; int height; +void FilterCreation(int filter_size, float *kernel) +{ + // initialising standard deviation to 1.0 + double sigma = 1.0; + double r, s = 2.0 * sigma * sigma; + // sum is for normalization + double sum = 0.0; + int itr = 0; + // generating filter_sizexfilter_size kernel + for (int x = -filter_size/2; x <= filter_size/2; x++) { + for (int y = -filter_size/2; y <= filter_size/2; y++) { + r = sqrt(x * x + y * y); + kernel[itr] = (exp(-(r * r) / s)) / (PI * s); + sum += kernel[itr]; + itr++; + } + } + + // normalising the Kernel + for (int i = 0; i < filter_size * filter_size; ++i) + { + kernel[i] /= sum; + } +} //------------------------------- //-------------MAIN-------------- @@ -57,7 +81,6 @@ int main(int argc, char** argv) { printf("Usage: %s SCENEFILE.txt\n", argv[0]); return 1; } - const char *sceneFile = argv[1]; // Load scene file @@ -151,8 +174,12 @@ void runCuda() { // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if (iteration == 0) { + int filter_size = glm::sqrt(ui_filterSize); + filter_size = filter_size % 2 == 0 ? filter_size + 1 : filter_size; + float *gKernel = new float[filter_size * filter_size]; + FilterCreation(filter_size, gKernel); pathtraceFree(); - pathtraceInit(scene); + pathtraceInit(scene, ui_colorWeight, ui_normalWeight, ui_positionWeight, gKernel, filter_size); } uchar4 *pbo_dptr = NULL; diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 2d572ef..02b7da6 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -39,14 +39,14 @@ void checkCUDAErrorFn(const char* msg, const char* file, int line) { } -float gaussianKernel[25] = { 0.003765, 0.015019, 0.023792, 0.015019, 0.003765, -0.015019, 0.059912, 0.094907, 0.059912, 0.015019, -0.023792, 0.094907, 0.150342, 0.094907, 0.023792, -0.015019, 0.059912, 0.094907, 0.059912, 0.015019, -0.003765, 0.015019, 0.023792, 0.015019, 0.003765, }; +//float gaussianKernel[25] = { 0.003765, 0.015019, 0.023792, 0.015019, 0.003765, +//0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +//0.023792, 0.094907, 0.150342, 0.094907, 0.023792, +//0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +//0.003765, 0.015019, 0.023792, 0.015019, 0.003765, }; -glm::vec2 offsetKernel[25]; +//glm::vec2 offsetKernel[25]; __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { @@ -167,22 +167,28 @@ static GBufferPixel* dev_gBuffer = NULL; static float* dev_gausKernel = NULL; static glm::vec2* dev_offsetKernel = NULL; static glm::vec3* dev_TrousImage = NULL; + +static float *dev_ui_colorWeight; +static float *dev_ui_normalWeight ; +static float *dev_ui_positionWeight; +static float *dev_ui_filterSize; //static glm::vec3* dev_IntermediaryImage = NULL; -void generateOffsetKern() +void generateOffsetKern(int filterSize, vector &offsetKernel) { int index = 0; - for (int y = -2; y <= 2; y++) + filterSize = filterSize % 2 == 0 ? filterSize - 1 : filterSize; + for (int y = -filterSize/2; y <= filterSize/2; y++) { - for (int x = -2; x <= 2; x++) + for (int x = -filterSize/2; x <= filterSize/2; x++) { - offsetKernel[index] = glm::vec2(x, y); + offsetKernel.push_back(glm::vec2(x, y)); index++; } } } -void pathtraceInit(Scene* scene) { +void pathtraceInit(Scene* scene, float ui_colorWeight, float ui_normalWeight, float ui_positionWeight, float *gausKernel, float filterSize) { hst_scene = scene; const Camera& cam = hst_scene->state.camera; const int pixelcount = cam.resolution.x * cam.resolution.y; @@ -205,14 +211,39 @@ void pathtraceInit(Scene* scene) { // TODO: initialize any extra device memeory you need - cudaMalloc(&dev_gausKernel, 25 * sizeof(float)); - cudaMemcpy(dev_gausKernel, gaussianKernel, 25 * sizeof(float), cudaMemcpyHostToDevice); + cudaMalloc(&dev_gausKernel, filterSize * filterSize * sizeof(float)); + cudaMemcpy(dev_gausKernel, gausKernel, filterSize * filterSize * sizeof(float), cudaMemcpyHostToDevice); + + for (int i = 0; i < filterSize * filterSize; i++) + { + std::cout << gausKernel[i]; + } + + vector< glm::vec2> offKern; - generateOffsetKern(); - cudaMalloc(&dev_offsetKernel, 25 * sizeof(glm::vec2)); - cudaMemcpy(dev_offsetKernel, offsetKernel, 25 * sizeof(glm::vec2), cudaMemcpyHostToDevice); + generateOffsetKern(filterSize, offKern); + cudaMalloc(&dev_offsetKernel, filterSize * filterSize * sizeof(glm::vec2)); + cudaMemcpy(dev_offsetKernel, offKern.data(), filterSize * filterSize * sizeof(glm::vec2), cudaMemcpyHostToDevice); cudaMalloc(&dev_TrousImage, pixelcount * sizeof(glm::vec3)); + + cudaMalloc(&dev_ui_colorWeight, sizeof(float)); + cudaMalloc(&dev_ui_normalWeight, sizeof(float)); + cudaMalloc(&dev_ui_positionWeight, sizeof(float)); + cudaMalloc(&dev_ui_filterSize, sizeof(float)); + + + + //cudaMemset(dev_ui_colorWeight, ui_colorWeight, sizeof(float)); + //cudaMemset(dev_ui_normalWeight, ui_normalWeight, sizeof(float)); + //cudaMemset(dev_ui_positionWeight, ui_positionWeight, sizeof(float)); + + cudaMemcpy(dev_ui_colorWeight, &ui_colorWeight, sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(dev_ui_normalWeight, &ui_normalWeight, sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(dev_ui_positionWeight, &ui_normalWeight, sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(dev_ui_filterSize, &filterSize, sizeof(float), cudaMemcpyHostToDevice); + + /*cudaMalloc(&dev_IntermediaryImage, pixelcount * sizeof(glm::vec3));*/ checkCUDAError("pathtraceInit"); @@ -230,7 +261,11 @@ void pathtraceFree() { cudaFree(dev_gausKernel); cudaFree(dev_offsetKernel); cudaFree(dev_TrousImage); - /*cudaFree(dev_IntermediaryImage);*/ + + cudaFree(dev_ui_colorWeight); + cudaFree(dev_ui_normalWeight); + cudaFree(dev_ui_positionWeight); + cudaFree(dev_ui_filterSize); checkCUDAError("pathtraceFree"); } @@ -320,37 +355,89 @@ __global__ void CopyDataToInterImage( } - //__global__ void GenerateGaussianBlur( - // int num_paths, - // float* dev_gausKernel, glm::vec2* dev_offsetKernel, - // glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, - // GBufferPixel* gbuf, const Camera cam - //) - //{ - - // int index = blockIdx.x * blockDim.x + threadIdx.x; - - // if (index < num_paths) - // { - // glm::vec3 currColor = glm::vec3(0.0f); - // //glm::vec3 currColor = dev_colorImage[index]; - // for (int i = 0; i < 25; i++) - // { - - // float offsetX = dev_offsetKernel[i].x; - // float offsetY = dev_offsetKernel[i].y; - // float gausValue = dev_gausKernel[i]; - // int offsetColorIdx = index + (offsetY * cam.resolution.x + offsetX); - // if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) - // { - // glm::vec3 newColor = dev_colorImage[offsetColorIdx]; - // currColor += newColor * dev_gausKernel[i]; - // } - // } - // dev_TrousImage[index] = currColor; - // } - - //} + __global__ void GenerateAtrousImage( + int num_paths, int filterSize, + float* dev_gausKernel, glm::vec2* dev_offsetKernel, + glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, + GBufferPixel* gbuf, const Camera cam, float* dev_ui_colorWeight, + float* dev_ui_normalWeight,float* dev_ui_positionWeight + ) + { + + int index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index < num_paths) + { + glm::vec3 sum = glm::vec3(0.0f); + glm::vec3 cval = dev_colorImage[index]; + glm::vec3 nval = gbuf[index].normal; + glm::vec3 pval = gbuf[index].position; + + float cphi = dev_ui_colorWeight[0] * dev_ui_colorWeight[0]; + float nphi = dev_ui_normalWeight[0] * dev_ui_normalWeight[0]; + float pphi = dev_ui_positionWeight[0] * dev_ui_positionWeight[0]; + + float cum_w = 0.0f; + for (int stepIter = 0; stepIter < 1; stepIter++) + { + for (int i = 0; i < 25; i++) + { + int stepWidth = 1 << stepIter; + // Calculate Offseted Index + int index2D_y = index / cam.resolution.x; + int index2D_x = (int)(index % cam.resolution.x); + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = index2D_x + offsetX * stepWidth; // Final Offset Values + int finalValue_Y = index2D_y + offsetY * stepWidth; // Final Offset Values + + if (finalValue_X >= 0 && finalValue_X <= (cam.resolution.x - 1) && finalValue_Y >= 0 && finalValue_Y <= (cam.resolution.y - 1)) + { + int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + { + glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; + glm::vec3 t = cval - ctmp; + float dist2 = glm::dot(t, t); + if (dist2 != 0.0f) + { + dist2 = dist2; + } + float newVal = glm::exp(-1 * (dist2) / cphi); + float c_w = glm::min(newVal, 1.0f); + + glm::vec3 ntmp = gbuf[offsetColorIdx].normal; + t = nval - ntmp; + dist2 = glm::max(glm::dot(t, t)/ (stepWidth * stepWidth), 0.0f); + newVal = glm::exp(-1 * (dist2) / nphi ); + float n_w = glm::min(newVal, 1.0f); + + glm::vec3 ptmp = gbuf[offsetColorIdx].position; + t = pval - ptmp; + dist2 = glm::dot(t, t); + newVal = glm::exp(-1 * (dist2) / pphi); + float p_w = glm::min(newVal, 1.0f); + float weight = c_w * n_w * p_w; + + + if (weight < 0.9f && weight >0.1f) + { + c_w = c_w; + } + + sum += ctmp * weight * dev_gausKernel[i]; + cum_w += weight * dev_gausKernel[i]; + + } + } + } + } + dev_TrousImage[index] = sum / cum_w; + } + + } __global__ void computeIntersections( @@ -578,12 +665,6 @@ __global__ void CopyDataToInterImage( if (depth == 0) { generateGBuffer << > > (num_paths, dev_intersections, dev_paths, dev_gBuffer); } - /* if (depth == traceDepth) { - CopyDataToInterImage << > > (iter, num_paths, dev_paths, dev_IntermediaryImage); - - GenerateATrousImage << > > (depth, num_paths, dev_gausKernel, dev_offsetKernel, - dev_IntermediaryImage, dev_TrousImage, dev_gBuffer, cam); - }*/ depth++; shadeSimpleMaterials << > > ( @@ -598,11 +679,11 @@ __global__ void CopyDataToInterImage( // Assemble this iteration and apply it to the image dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; finalGather << > > (num_paths, dev_image, dev_paths); - GenerateGaussianBlur << > > (num_paths, dev_gausKernel, dev_offsetKernel, - dev_image, dev_TrousImage, cam); + //GenerateGaussianBlur << > > (num_paths, dev_gausKernel, dev_offsetKernel, + // dev_image, dev_TrousImage, cam); - //GenerateATrousImage << > > (depth, num_paths, dev_gausKernel, dev_offsetKernel, - // dev_image, dev_TrousImage, dev_gBuffer, cam); + GenerateAtrousImage << > > (num_paths, dev_ui_filterSize[0],dev_gausKernel, dev_offsetKernel, + dev_image, dev_TrousImage, dev_gBuffer, cam, dev_ui_colorWeight, dev_ui_normalWeight, dev_ui_positionWeight); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. diff --git a/src/pathtrace.h b/src/pathtrace.h index 9e12f44..28721c7 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -3,7 +3,7 @@ #include #include "scene.h" -void pathtraceInit(Scene *scene); +void pathtraceInit(Scene *scene, float a, float b, float c, float* gausKernel, float filterSize); void pathtraceFree(); void pathtrace(int frame, int iteration); void showGBuffer(uchar4 *pbo); diff --git a/src/sceneStructs.h b/src/sceneStructs.h index d455543..131f211 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -81,5 +81,4 @@ struct GBufferPixel { float t; glm::vec3 normal; glm::vec3 position; - glm::vec3 pathColor; }; From 208126cf5a3e4cd925e7869eb283380d5acc0146 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Thu, 21 Oct 2021 02:34:47 -0400 Subject: [PATCH 09/14] A trou Implemented --- src/main.cpp | 4 +++- src/pathtrace.cu | 51 +++++++++++++++++++----------------------------- 2 files changed, 23 insertions(+), 32 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index aa92675..3561ee6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -53,10 +53,12 @@ void FilterCreation(int filter_size, float *kernel) // sum is for normalization double sum = 0.0; int itr = 0; + + int center = filter_size / 2.0f; // generating filter_sizexfilter_size kernel for (int x = -filter_size/2; x <= filter_size/2; x++) { for (int y = -filter_size/2; y <= filter_size/2; y++) { - r = sqrt(x * x + y * y); + r = sqrt( x * x + y * y ); kernel[itr] = (exp(-(r * r) / s)) / (PI * s); sum += kernel[itr]; itr++; diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 02b7da6..020cbfd 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -144,9 +144,9 @@ __global__ void gbufferToPBO_Atrous(uchar4* pbo, glm::ivec2 resolution, GBufferP glm::ivec3 color; - color.x = glm::clamp((int)(pix.x/10 * 255.0), 0, 255); - color.y = glm::clamp((int)(pix.y/10 * 255.0), 0, 255); - color.z = glm::clamp((int)(pix.z/10 * 255.0), 0, 255); + color.x = glm::clamp((int)(pix.x/2 * 255.0), 0, 255); + color.y = glm::clamp((int)(pix.y/2 * 255.0), 0, 255); + color.z = glm::clamp((int)(pix.z/2 * 255.0), 0, 255); ; pbo[index].w = 0; pbo[index].x = color.x; @@ -168,10 +168,10 @@ static float* dev_gausKernel = NULL; static glm::vec2* dev_offsetKernel = NULL; static glm::vec3* dev_TrousImage = NULL; -static float *dev_ui_colorWeight; -static float *dev_ui_normalWeight ; -static float *dev_ui_positionWeight; -static float *dev_ui_filterSize; +static float ui_colorWeight = 0.0f; +static float ui_normalWeight = 0.0f; +static float ui_positionWeight = 0.0f; +static float ui_filterSize = 0.0f; //static glm::vec3* dev_IntermediaryImage = NULL; void generateOffsetKern(int filterSize, vector &offsetKernel) @@ -188,7 +188,7 @@ void generateOffsetKern(int filterSize, vector &offsetKernel) } } -void pathtraceInit(Scene* scene, float ui_colorWeight, float ui_normalWeight, float ui_positionWeight, float *gausKernel, float filterSize) { +void pathtraceInit(Scene* scene, float a_ui_colorWeight, float a_ui_normalWeight, float a_ui_positionWeight, float *gausKernel, float filterSize) { hst_scene = scene; const Camera& cam = hst_scene->state.camera; const int pixelcount = cam.resolution.x * cam.resolution.y; @@ -227,21 +227,16 @@ void pathtraceInit(Scene* scene, float ui_colorWeight, float ui_normalWeight, fl cudaMalloc(&dev_TrousImage, pixelcount * sizeof(glm::vec3)); - cudaMalloc(&dev_ui_colorWeight, sizeof(float)); - cudaMalloc(&dev_ui_normalWeight, sizeof(float)); - cudaMalloc(&dev_ui_positionWeight, sizeof(float)); - cudaMalloc(&dev_ui_filterSize, sizeof(float)); - //cudaMemset(dev_ui_colorWeight, ui_colorWeight, sizeof(float)); //cudaMemset(dev_ui_normalWeight, ui_normalWeight, sizeof(float)); //cudaMemset(dev_ui_positionWeight, ui_positionWeight, sizeof(float)); - cudaMemcpy(dev_ui_colorWeight, &ui_colorWeight, sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(dev_ui_normalWeight, &ui_normalWeight, sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(dev_ui_positionWeight, &ui_normalWeight, sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(dev_ui_filterSize, &filterSize, sizeof(float), cudaMemcpyHostToDevice); + ui_colorWeight = a_ui_colorWeight; + ui_normalWeight = a_ui_normalWeight; + ui_positionWeight = a_ui_positionWeight; + ui_filterSize = filterSize; /*cudaMalloc(&dev_IntermediaryImage, pixelcount * sizeof(glm::vec3));*/ @@ -261,11 +256,6 @@ void pathtraceFree() { cudaFree(dev_gausKernel); cudaFree(dev_offsetKernel); cudaFree(dev_TrousImage); - - cudaFree(dev_ui_colorWeight); - cudaFree(dev_ui_normalWeight); - cudaFree(dev_ui_positionWeight); - cudaFree(dev_ui_filterSize); checkCUDAError("pathtraceFree"); } @@ -359,8 +349,8 @@ __global__ void CopyDataToInterImage( int num_paths, int filterSize, float* dev_gausKernel, glm::vec2* dev_offsetKernel, glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, - GBufferPixel* gbuf, const Camera cam, float* dev_ui_colorWeight, - float* dev_ui_normalWeight,float* dev_ui_positionWeight + GBufferPixel* gbuf, const Camera cam, float ui_colorWeight, + float ui_normalWeight,float ui_positionWeight ) { @@ -373,12 +363,12 @@ __global__ void CopyDataToInterImage( glm::vec3 nval = gbuf[index].normal; glm::vec3 pval = gbuf[index].position; - float cphi = dev_ui_colorWeight[0] * dev_ui_colorWeight[0]; - float nphi = dev_ui_normalWeight[0] * dev_ui_normalWeight[0]; - float pphi = dev_ui_positionWeight[0] * dev_ui_positionWeight[0]; + float cphi = ui_colorWeight * ui_colorWeight; + float nphi = ui_normalWeight * ui_normalWeight; + float pphi = ui_positionWeight * ui_positionWeight; float cum_w = 0.0f; - for (int stepIter = 0; stepIter < 1; stepIter++) + for (int stepIter = 0; stepIter < 10; stepIter++) { for (int i = 0; i < 25; i++) { @@ -681,9 +671,8 @@ __global__ void CopyDataToInterImage( finalGather << > > (num_paths, dev_image, dev_paths); //GenerateGaussianBlur << > > (num_paths, dev_gausKernel, dev_offsetKernel, // dev_image, dev_TrousImage, cam); - - GenerateAtrousImage << > > (num_paths, dev_ui_filterSize[0],dev_gausKernel, dev_offsetKernel, - dev_image, dev_TrousImage, dev_gBuffer, cam, dev_ui_colorWeight, dev_ui_normalWeight, dev_ui_positionWeight); + GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, + dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. From 06d27198c88022b06b866aa136ab173a0957b578 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Thu, 21 Oct 2021 11:52:45 -0400 Subject: [PATCH 10/14] Updates --- src/main.cpp | 20 +++-- src/pathtrace.cu | 188 +++++++++++++++++++++++++++++++++-------------- src/pathtrace.h | 2 + 3 files changed, 148 insertions(+), 62 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 3561ee6..e6a49c5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -48,18 +48,16 @@ int height; void FilterCreation(int filter_size, float *kernel) { // initialising standard deviation to 1.0 - double sigma = 1.0; - double r, s = 2.0 * sigma * sigma; + float sigma = 1.0; + float r, s = 2.0 * sigma * sigma; // sum is for normalization - double sum = 0.0; + float sum = 0.0; int itr = 0; - - int center = filter_size / 2.0f; // generating filter_sizexfilter_size kernel for (int x = -filter_size/2; x <= filter_size/2; x++) { for (int y = -filter_size/2; y <= filter_size/2; y++) { - r = sqrt( x * x + y * y ); - kernel[itr] = (exp(-(r * r) / s)) / (PI * s); + r = x * x + y * y ; + kernel[itr] = (glm::exp(-(r) / s)) / (PI * s); sum += kernel[itr]; itr++; } @@ -72,6 +70,7 @@ void FilterCreation(int filter_size, float *kernel) } } + //------------------------------- //-------------MAIN-------------- //------------------------------- @@ -197,7 +196,12 @@ void runCuda() { if (ui_showGbuffer) { showGBuffer(pbo_dptr); - } else { + } + else if (ui_denoise) + { + showDenoise(pbo_dptr, iteration); + } + else { showImage(pbo_dptr, iteration); } diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 020cbfd..37a62fc 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -132,28 +132,6 @@ __global__ void gbufferToPBO_Position(uchar4* pbo, glm::ivec2 resolution, GBuffe } } -__global__ void gbufferToPBO_Atrous(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer, glm::vec3* TrousImage) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - - - glm::vec3 pix = TrousImage[index]; - glm::ivec3 color; - - - color.x = glm::clamp((int)(pix.x/2 * 255.0), 0, 255); - color.y = glm::clamp((int)(pix.y/2 * 255.0), 0, 255); - color.z = glm::clamp((int)(pix.z/2 * 255.0), 0, 255); -; - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } -} static Scene* hst_scene = NULL; static glm::vec3* dev_image = NULL; @@ -214,11 +192,6 @@ void pathtraceInit(Scene* scene, float a_ui_colorWeight, float a_ui_normalWeight cudaMalloc(&dev_gausKernel, filterSize * filterSize * sizeof(float)); cudaMemcpy(dev_gausKernel, gausKernel, filterSize * filterSize * sizeof(float), cudaMemcpyHostToDevice); - for (int i = 0; i < filterSize * filterSize; i++) - { - std::cout << gausKernel[i]; - } - vector< glm::vec2> offKern; generateOffsetKern(filterSize, offKern); @@ -289,23 +262,8 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } -__global__ void CopyDataToInterImage( - int iter, int num_paths, - PathSegment* pathSegments, glm::vec3* dev_interImage) -{ - int path_index = blockIdx.x * blockDim.x + threadIdx.x; - - if (path_index < num_paths) - { - - PathSegment iterationPath = pathSegments[path_index]; - glm::vec3 currColor = dev_interImage[iterationPath.pixelIndex] + iterationPath.color; - dev_interImage[iterationPath.pixelIndex] += iterationPath.color ; - } -} - __global__ void GenerateGaussianBlur( - int num_paths, + int num_paths, int filterSize, float* dev_gausKernel, glm::vec2 *dev_offsetKernel, glm::vec3* dev_colorImage, glm::vec3 *dev_TrousImage, const Camera cam @@ -318,7 +276,7 @@ __global__ void CopyDataToInterImage( { glm::vec3 currColor =glm::vec3(0.0f); //glm::vec3 currColor = dev_colorImage[index]; - for (int i = 0; i < 25; i++) + for (int i = 0; i < filterSize ; i++) { int index2D_y = index / cam.resolution.x; int index2D_x = (int)(index % cam.resolution.x); @@ -345,6 +303,47 @@ __global__ void CopyDataToInterImage( } + __global__ void GenerateGaussianBlur2( + int num_paths, int filterSize, + float* dev_gausKernel, glm::vec2* dev_offsetKernel, + glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, + const Camera cam + ) + { + + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < cam.resolution.x && y < cam.resolution.y) + { + int index = x + (y * cam.resolution.x); + glm::vec3 currColor = glm::vec3(0.0f); + //glm::vec3 currColor = dev_colorImage[index]; + for (int i = 0; i < filterSize; i++) + { + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = x + offsetX; + int finalValue_Y = y + offsetY; + + finalValue_X = glm::clamp(finalValue_X, 0, cam.resolution.x - 1); + finalValue_Y = glm::clamp(finalValue_Y, 0, cam.resolution.y - 1); + float gausValue = dev_gausKernel[i]; + int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; + /* if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + {*/ + glm::vec3 newColor = dev_colorImage[offsetColorIdx]; + currColor += newColor * dev_gausKernel[i]; + /*}*/ + + } + dev_TrousImage[index] = currColor; + } + + } + __global__ void GenerateAtrousImage( int num_paths, int filterSize, float* dev_gausKernel, glm::vec2* dev_offsetKernel, @@ -370,7 +369,7 @@ __global__ void CopyDataToInterImage( float cum_w = 0.0f; for (int stepIter = 0; stepIter < 10; stepIter++) { - for (int i = 0; i < 25; i++) + for (int i = 0; i < filterSize; i++) { int stepWidth = 1 << stepIter; // Calculate Offseted Index @@ -428,7 +427,77 @@ __global__ void CopyDataToInterImage( } } + + __global__ void GenerateAtrousImage2( + int num_paths, int filterSize, + float* dev_gausKernel, glm::vec2* dev_offsetKernel, + glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, + GBufferPixel* gbuf, const Camera cam, float ui_colorWeight, + float ui_normalWeight, float ui_positionWeight + ) + { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < cam.resolution.x && y < cam.resolution.y) + { + int index = x + (y * cam.resolution.x); + glm::vec3 sum = glm::vec3(0.0f); + glm::vec3 cval = dev_colorImage[index]; + glm::vec3 nval = gbuf[index].normal; + glm::vec3 pval = gbuf[index].position; + + float cphi = ui_colorWeight * ui_colorWeight; + float nphi = ui_normalWeight * ui_normalWeight; + float pphi = ui_positionWeight * ui_positionWeight; + + float cum_w = 0.0f; + for (int stepIter = 0; stepIter < 10; stepIter++) + { + for (int i = 0; i < 25; i++) + { + int stepWidth = 1 << stepIter; + // Calculate Offseted Index + + int offsetX = dev_offsetKernel[i].x; + int offsetY = dev_offsetKernel[i].y; + + int finalValue_X = glm::clamp ((float)x + (float)offsetX * stepWidth, 0.0f, (float)cam.resolution.x - 1.0f); + int finalValue_Y = glm::clamp((float)y + (float)offsetY * stepWidth, 0.0f, (float)cam.resolution.y - 1.0f); + + int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + { + glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; + glm::vec3 t = cval - ctmp; + float dist2 = glm::dot(t, t); + float newVal = glm::exp(-1 * (dist2) / cphi); + float c_w = glm::min(newVal, 1.0f); + + glm::vec3 ntmp = gbuf[offsetColorIdx].normal; + t = nval - ntmp; + dist2 = glm::max(glm::dot(t, t) / (stepWidth * stepWidth), 0.0f); + newVal = glm::exp(-1 * (dist2) / nphi); + float n_w = glm::min(newVal, 1.0f); + + glm::vec3 ptmp = gbuf[offsetColorIdx].position; + t = pval - ptmp; + dist2 = glm::dot(t, t); + newVal = glm::exp(-1 * (dist2) / pphi); + float p_w = glm::min(newVal, 1.0f); + float weight = c_w * n_w * p_w; + sum += ctmp * weight * dev_gausKernel[i]; + cum_w += weight * dev_gausKernel[i]; + + } + + } + } + dev_TrousImage[index] = sum / cum_w; + } + + } __global__ void computeIntersections( int depth @@ -553,9 +622,9 @@ __global__ void CopyDataToInterImage( if (idx < num_paths) { int pixelPosition = pathSegments[idx].pixelIndex; - gBuffer[pixelPosition].t = shadeableIntersections[idx].t; - gBuffer[pixelPosition].normal = shadeableIntersections[idx].surfaceNormal; - gBuffer[pixelPosition].position = getPointOnRay(pathSegments[idx].ray, shadeableIntersections[idx].t); + gBuffer[idx].t = shadeableIntersections[idx].t; + gBuffer[idx].normal = shadeableIntersections[idx].surfaceNormal; + gBuffer[idx].position = getPointOnRay(pathSegments[idx].ray, shadeableIntersections[idx].t); } } @@ -669,10 +738,10 @@ __global__ void CopyDataToInterImage( // Assemble this iteration and apply it to the image dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; finalGather << > > (num_paths, dev_image, dev_paths); - //GenerateGaussianBlur << > > (num_paths, dev_gausKernel, dev_offsetKernel, - // dev_image, dev_TrousImage, cam); - GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, - dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); + GenerateGaussianBlur2 << > > (num_paths, ui_filterSize,dev_gausKernel, dev_offsetKernel, + dev_image, dev_TrousImage, cam); +// GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, +// dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. @@ -696,8 +765,7 @@ __global__ void CopyDataToInterImage( // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization //gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer); //gbufferToPBO_Normals<<>>(pbo, cam.resolution, dev_gBuffer); - //gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); - gbufferToPBO_Atrous << > > (pbo, cam.resolution, dev_gBuffer, dev_TrousImage); + gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); } void showImage(uchar4 * pbo, int iter) { @@ -710,3 +778,15 @@ __global__ void CopyDataToInterImage( // Send results to OpenGL buffer for rendering sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); } + + + void showDenoise(uchar4* pbo, int iter) { + const Camera& cam = hst_scene->state.camera; + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_TrousImage); + } diff --git a/src/pathtrace.h b/src/pathtrace.h index 28721c7..5ae7dfc 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -8,3 +8,5 @@ void pathtraceFree(); void pathtrace(int frame, int iteration); void showGBuffer(uchar4 *pbo); void showImage(uchar4 *pbo, int iter); +void showDenoise(uchar4 *pbo, int iter); + From 03e87bc447e5175c3a1e9c2d22966df2b6700611 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Thu, 21 Oct 2021 18:06:39 -0400 Subject: [PATCH 11/14] Fixed Filter Size --- src/pathtrace.cu | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 37a62fc..2a07675 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -317,14 +317,13 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path if (x < cam.resolution.x && y < cam.resolution.y) { int index = x + (y * cam.resolution.x); + glm::vec3 color = dev_colorImage[index]; glm::vec3 currColor = glm::vec3(0.0f); //glm::vec3 currColor = dev_colorImage[index]; - for (int i = 0; i < filterSize; i++) + for (int i = 0; i < filterSize * filterSize; i++) { - int offsetX = dev_offsetKernel[i].x; int offsetY = dev_offsetKernel[i].y; - int finalValue_X = x + offsetX; int finalValue_Y = y + offsetY; @@ -367,9 +366,9 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path float pphi = ui_positionWeight * ui_positionWeight; float cum_w = 0.0f; - for (int stepIter = 0; stepIter < 10; stepIter++) + for (int stepIter = 0; stepIter < 1; stepIter++) { - for (int i = 0; i < filterSize; i++) + for (int i = 0; i < filterSize * filterSize; i++) { int stepWidth = 1 << stepIter; // Calculate Offseted Index @@ -453,9 +452,9 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path float pphi = ui_positionWeight * ui_positionWeight; float cum_w = 0.0f; - for (int stepIter = 0; stepIter < 10; stepIter++) + for (int stepIter = 0; stepIter < 1; stepIter++) { - for (int i = 0; i < 25; i++) + for (int i = 0; i < filterSize *filterSize; i++) { int stepWidth = 1 << stepIter; // Calculate Offseted Index @@ -472,12 +471,12 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; glm::vec3 t = cval - ctmp; float dist2 = glm::dot(t, t); - float newVal = glm::exp(-1 * (dist2) / cphi); - float c_w = glm::min(newVal, 1.0f); + float newVal = glm::exp(-(dist2) / cphi); + float c_w = glm::min(newVal, 1.f); glm::vec3 ntmp = gbuf[offsetColorIdx].normal; t = nval - ntmp; - dist2 = glm::max(glm::dot(t, t) / (stepWidth * stepWidth), 0.0f); + dist2 = glm::max(glm::dot(t, t) / (stepWidth * stepWidth), 0.f); newVal = glm::exp(-1 * (dist2) / nphi); float n_w = glm::min(newVal, 1.0f); @@ -485,7 +484,7 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path t = pval - ptmp; dist2 = glm::dot(t, t); newVal = glm::exp(-1 * (dist2) / pphi); - float p_w = glm::min(newVal, 1.0f); + float p_w = glm::min(newVal, 1.f); float weight = c_w * n_w * p_w; sum += ctmp * weight * dev_gausKernel[i]; cum_w += weight * dev_gausKernel[i]; @@ -495,6 +494,7 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } } dev_TrousImage[index] = sum / cum_w; + //dev_TrousImage[index] = cval; } } @@ -738,10 +738,12 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path // Assemble this iteration and apply it to the image dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; finalGather << > > (num_paths, dev_image, dev_paths); - GenerateGaussianBlur2 << > > (num_paths, ui_filterSize,dev_gausKernel, dev_offsetKernel, - dev_image, dev_TrousImage, cam); -// GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, -// dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); +// GenerateGaussianBlur2 << > > (num_paths, ui_filterSize,dev_gausKernel, dev_offsetKernel, +// dev_image, dev_TrousImage, cam); + //GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, + // dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); + GenerateAtrousImage2 << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, + dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. From e6449827032fbdd515ba014044bda527e3f6f12e Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Fri, 22 Oct 2021 12:36:08 -0400 Subject: [PATCH 12/14] Fix Atrous Update --- src/main.cpp | 19 +++-- src/pathtrace.cu | 204 ++++++++++++++++++++++++++++------------------- src/pathtrace.h | 3 +- src/preview.cpp | 2 +- 4 files changed, 138 insertions(+), 90 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index e6a49c5..d25d5cb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -29,6 +29,7 @@ float ui_colorWeight = 0.45f; float ui_normalWeight = 0.35f; float ui_positionWeight = 0.2f; bool ui_saveAndExit = false; +bool imageDenoised = false; static bool camchanged = true; static float dtheta = 0, dphi = 0; @@ -175,12 +176,11 @@ void runCuda() { // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if (iteration == 0) { - int filter_size = glm::sqrt(ui_filterSize); - filter_size = filter_size % 2 == 0 ? filter_size + 1 : filter_size; - float *gKernel = new float[filter_size * filter_size]; - FilterCreation(filter_size, gKernel); + float *gKernel = new float[5 * 5]; + FilterCreation(5, gKernel); pathtraceFree(); - pathtraceInit(scene, ui_colorWeight, ui_normalWeight, ui_positionWeight, gKernel, filter_size); + pathtraceInit(scene, gKernel); + imageDenoised = false; } uchar4 *pbo_dptr = NULL; @@ -191,14 +191,19 @@ void runCuda() { // execute the kernel int frame = 0; - pathtrace(frame, iteration); + pathtrace(frame, iteration); } if (ui_showGbuffer) { showGBuffer(pbo_dptr); } - else if (ui_denoise) + else if (ui_denoise && iteration == ui_iterations) { + if (!imageDenoised) + { + imageDenoised = DenoiseImage(renderState->camera.resolution.x, renderState->camera.resolution.y, iteration, ui_filterSize, + ui_colorWeight, ui_normalWeight, ui_positionWeight); + } showDenoise(pbo_dptr, iteration); } else { diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 2a07675..57f4ecd 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -39,11 +39,11 @@ void checkCUDAErrorFn(const char* msg, const char* file, int line) { } -//float gaussianKernel[25] = { 0.003765, 0.015019, 0.023792, 0.015019, 0.003765, -//0.015019, 0.059912, 0.094907, 0.059912, 0.015019, -//0.023792, 0.094907, 0.150342, 0.094907, 0.023792, -//0.015019, 0.059912, 0.094907, 0.059912, 0.015019, -//0.003765, 0.015019, 0.023792, 0.015019, 0.003765, }; +float gaussianKernel[25] = { 0.003765, 0.015019, 0.023792, 0.015019, 0.003765, +0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +0.023792, 0.094907, 0.150342, 0.094907, 0.023792, +0.015019, 0.059912, 0.094907, 0.059912, 0.015019, +0.003765, 0.015019, 0.023792, 0.015019, 0.003765, }; //glm::vec2 offsetKernel[25]; @@ -77,6 +77,29 @@ __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, } } +//Kernel that writes the image to the OpenGL PBO directly. +__global__ void sendImageToPBOCopy(uchar4* pbo, glm::ivec2 resolution, + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int)((pix.x / iter) * 255.0), 0, 255); + color.y = glm::clamp((int)((pix.y / iter) * 255.0), 0, 255); + color.z = glm::clamp((int)((pix.z / iter) * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } +} + __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; @@ -145,11 +168,8 @@ static GBufferPixel* dev_gBuffer = NULL; static float* dev_gausKernel = NULL; static glm::vec2* dev_offsetKernel = NULL; static glm::vec3* dev_TrousImage = NULL; +static glm::vec3* dev_pingPongImage = NULL; -static float ui_colorWeight = 0.0f; -static float ui_normalWeight = 0.0f; -static float ui_positionWeight = 0.0f; -static float ui_filterSize = 0.0f; //static glm::vec3* dev_IntermediaryImage = NULL; void generateOffsetKern(int filterSize, vector &offsetKernel) @@ -166,7 +186,7 @@ void generateOffsetKern(int filterSize, vector &offsetKernel) } } -void pathtraceInit(Scene* scene, float a_ui_colorWeight, float a_ui_normalWeight, float a_ui_positionWeight, float *gausKernel, float filterSize) { +void pathtraceInit(Scene* scene,float *gausKernel) { hst_scene = scene; const Camera& cam = hst_scene->state.camera; const int pixelcount = cam.resolution.x * cam.resolution.y; @@ -189,31 +209,19 @@ void pathtraceInit(Scene* scene, float a_ui_colorWeight, float a_ui_normalWeight // TODO: initialize any extra device memeory you need - cudaMalloc(&dev_gausKernel, filterSize * filterSize * sizeof(float)); - cudaMemcpy(dev_gausKernel, gausKernel, filterSize * filterSize * sizeof(float), cudaMemcpyHostToDevice); + cudaMalloc(&dev_gausKernel, 25 * sizeof(float)); + cudaMemcpy(dev_gausKernel, gaussianKernel, 25 * sizeof(float), cudaMemcpyHostToDevice); vector< glm::vec2> offKern; - generateOffsetKern(filterSize, offKern); - cudaMalloc(&dev_offsetKernel, filterSize * filterSize * sizeof(glm::vec2)); - cudaMemcpy(dev_offsetKernel, offKern.data(), filterSize * filterSize * sizeof(glm::vec2), cudaMemcpyHostToDevice); + generateOffsetKern(5, offKern); + cudaMalloc(&dev_offsetKernel, 25 * sizeof(glm::vec2)); + cudaMemcpy(dev_offsetKernel, offKern.data(), 25 * sizeof(glm::vec2), cudaMemcpyHostToDevice); cudaMalloc(&dev_TrousImage, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_pingPongImage, pixelcount * sizeof(glm::vec3)); - - //cudaMemset(dev_ui_colorWeight, ui_colorWeight, sizeof(float)); - //cudaMemset(dev_ui_normalWeight, ui_normalWeight, sizeof(float)); - //cudaMemset(dev_ui_positionWeight, ui_positionWeight, sizeof(float)); - - ui_colorWeight = a_ui_colorWeight; - ui_normalWeight = a_ui_normalWeight; - ui_positionWeight = a_ui_positionWeight; - ui_filterSize = filterSize; - - - /*cudaMalloc(&dev_IntermediaryImage, pixelcount * sizeof(glm::vec3));*/ - checkCUDAError("pathtraceInit"); } @@ -229,6 +237,7 @@ void pathtraceFree() { cudaFree(dev_gausKernel); cudaFree(dev_offsetKernel); cudaFree(dev_TrousImage); + cudaFree(dev_pingPongImage); checkCUDAError("pathtraceFree"); } @@ -344,17 +353,17 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } __global__ void GenerateAtrousImage( - int num_paths, int filterSize, + int pixelCount, int stepWidth, float* dev_gausKernel, glm::vec2* dev_offsetKernel, glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, - GBufferPixel* gbuf, const Camera cam, float ui_colorWeight, - float ui_normalWeight,float ui_positionWeight + GBufferPixel* gbuf, int resolutionX, int resolutionY, float ui_colorWeight, + float ui_normalWeight, float ui_positionWeight ) { int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < num_paths) + if (index < pixelCount) { glm::vec3 sum = glm::vec3(0.0f); glm::vec3 cval = dev_colorImage[index]; @@ -366,14 +375,11 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path float pphi = ui_positionWeight * ui_positionWeight; float cum_w = 0.0f; - for (int stepIter = 0; stepIter < 1; stepIter++) - { - for (int i = 0; i < filterSize * filterSize; i++) + for (int i = 0; i < 25; i++) { - int stepWidth = 1 << stepIter; // Calculate Offseted Index - int index2D_y = index / cam.resolution.x; - int index2D_x = (int)(index % cam.resolution.x); + int index2D_y = index / resolutionX ; + int index2D_x = (int)(index % resolutionX); int offsetX = dev_offsetKernel[i].x; int offsetY = dev_offsetKernel[i].y; @@ -381,57 +387,50 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path int finalValue_X = index2D_x + offsetX * stepWidth; // Final Offset Values int finalValue_Y = index2D_y + offsetY * stepWidth; // Final Offset Values - if (finalValue_X >= 0 && finalValue_X <= (cam.resolution.x - 1) && finalValue_Y >= 0 && finalValue_Y <= (cam.resolution.y - 1)) + //finalValue_X = glm::clamp((float)finalValue_X, 0.0f, (float)resolutionX - 1.0f); + //finalValue_Y = glm::clamp((float)finalValue_Y, 0.0f, (float)resolutionY - 1.0f); + + if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) { - int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; - if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < pixelCount) { glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; glm::vec3 t = cval - ctmp; float dist2 = glm::dot(t, t); - if (dist2 != 0.0f) - { - dist2 = dist2; - } float newVal = glm::exp(-1 * (dist2) / cphi); - float c_w = glm::min(newVal, 1.0f); + float c_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); glm::vec3 ntmp = gbuf[offsetColorIdx].normal; t = nval - ntmp; dist2 = glm::max(glm::dot(t, t)/ (stepWidth * stepWidth), 0.0f); newVal = glm::exp(-1 * (dist2) / nphi ); - float n_w = glm::min(newVal, 1.0f); + float n_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); glm::vec3 ptmp = gbuf[offsetColorIdx].position; t = pval - ptmp; dist2 = glm::dot(t, t); newVal = glm::exp(-1 * (dist2) / pphi); - float p_w = glm::min(newVal, 1.0f); + float p_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); float weight = c_w * n_w * p_w; - - if (weight < 0.9f && weight >0.1f) - { - c_w = c_w; - } - sum += ctmp * weight * dev_gausKernel[i]; cum_w += weight * dev_gausKernel[i]; } } } - } + dev_TrousImage[index] = sum / cum_w; } } __global__ void GenerateAtrousImage2( - int num_paths, int filterSize, + int pixelCount, int stepWidth, float* dev_gausKernel, glm::vec2* dev_offsetKernel, glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, - GBufferPixel* gbuf, const Camera cam, float ui_colorWeight, + GBufferPixel* gbuf, int resolutionX, int resolutionY, float ui_colorWeight, float ui_normalWeight, float ui_positionWeight ) { @@ -439,62 +438,64 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; - if (x < cam.resolution.x && y < cam.resolution.y) + if (x < resolutionX && y < resolutionY) { - int index = x + (y * cam.resolution.x); + int index = x + (y * resolutionX); glm::vec3 sum = glm::vec3(0.0f); glm::vec3 cval = dev_colorImage[index]; glm::vec3 nval = gbuf[index].normal; glm::vec3 pval = gbuf[index].position; - float cphi = ui_colorWeight * ui_colorWeight; - float nphi = ui_normalWeight * ui_normalWeight; - float pphi = ui_positionWeight * ui_positionWeight; + float cphi = ui_colorWeight ; + float nphi = ui_normalWeight ; + float pphi = ui_positionWeight ; float cum_w = 0.0f; - for (int stepIter = 0; stepIter < 1; stepIter++) - { - for (int i = 0; i < filterSize *filterSize; i++) + for (int i = 0; i < 5 * 5; i++) { - int stepWidth = 1 << stepIter; // Calculate Offseted Index int offsetX = dev_offsetKernel[i].x; int offsetY = dev_offsetKernel[i].y; - int finalValue_X = glm::clamp ((float)x + (float)offsetX * stepWidth, 0.0f, (float)cam.resolution.x - 1.0f); - int finalValue_Y = glm::clamp((float)y + (float)offsetY * stepWidth, 0.0f, (float)cam.resolution.y - 1.0f); - - int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; - if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + int finalValue_X = x + offsetX * stepWidth; + int finalValue_Y = y + offsetY * stepWidth; + //int finalValue_X = glm::clamp ((float)x + (float)offsetX * stepWidth, 0.0f, (float)resolutionX - 1.0f); + //int finalValue_Y = glm::clamp((float)y + (float)offsetY * stepWidth, 0.0f, (float)resolutionY - 1.0f); + if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) + { + int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < pixelCount) { glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; glm::vec3 t = cval - ctmp; - float dist2 = glm::dot(t, t); + float dist2 = glm::length(t) * glm::length(t); float newVal = glm::exp(-(dist2) / cphi); - float c_w = glm::min(newVal, 1.f); + float c_w = glm::min(newVal, 1.0f); glm::vec3 ntmp = gbuf[offsetColorIdx].normal; t = nval - ntmp; - dist2 = glm::max(glm::dot(t, t) / (stepWidth * stepWidth), 0.f); + dist2 = glm::max( (glm::length(t) * glm::length(t)) / (stepWidth * stepWidth), 0.f); newVal = glm::exp(-1 * (dist2) / nphi); float n_w = glm::min(newVal, 1.0f); glm::vec3 ptmp = gbuf[offsetColorIdx].position; t = pval - ptmp; - dist2 = glm::dot(t, t); + dist2 = glm::length(t) * glm::length(t); newVal = glm::exp(-1 * (dist2) / pphi); - float p_w = glm::min(newVal, 1.f); + float p_w = glm::min(newVal, 1.0f); + + float weight = c_w * n_w * p_w; sum += ctmp * weight * dev_gausKernel[i]; cum_w += weight * dev_gausKernel[i]; } + } } - } + dev_TrousImage[index] = sum / cum_w; - //dev_TrousImage[index] = cval; } } @@ -742,8 +743,8 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path // dev_image, dev_TrousImage, cam); //GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, // dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); - GenerateAtrousImage2 << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, - dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); + // GenerateAtrousImage2 << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, + //dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. @@ -770,6 +771,47 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); } + __global__ void GeneratePingPongImage(int pixelCount, glm::vec3* devImage, glm::vec3* pingPongImage) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index < pixelCount) + { + pingPongImage[index] = devImage[index]; + } + } + + + bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, int colWeight, int norWeight, int posWeight) + { + int pixelCount = resolutionX * resolutionY; + const int blockSize1d = 128; + dim3 numblocksPathSegmentTracing = (pixelCount + blockSize1d - 1) / blockSize1d; + + // 2D block for generating ray from camera + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (resolutionX + blockSize2d.x - 1) / blockSize2d.x, + (resolutionY + blockSize2d.y - 1) / blockSize2d.y); + + int stepWidth = 1; + int blur_iterations = ceil(glm::log2((filterSize) / 4.f)); + GeneratePingPongImage << < numblocksPathSegmentTracing, blockSize1d >> > (pixelCount, dev_image, dev_pingPongImage); + for (int i = 1; i < blur_iterations; i ++) + { + + // 1D block for path tracing + GenerateAtrousImage << > > (pixelCount, stepWidth, dev_gausKernel, dev_offsetKernel, + dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colWeight, norWeight, posWeight); + stepWidth *= 2; + //GenerateAtrousImage2 << > > (pixelCount, i,dev_gausKernel, dev_offsetKernel, + // dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colWeight, norWeight, posWeight); + swap(dev_pingPongImage, dev_TrousImage); + } + swap(dev_pingPongImage, dev_TrousImage); // Getting the data back to dev_TrousImage + cudaDeviceSynchronize(); + return true; + } + void showImage(uchar4 * pbo, int iter) { const Camera& cam = hst_scene->state.camera; const dim3 blockSize2d(8, 8); @@ -790,5 +832,5 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // Send results to OpenGL buffer for rendering - sendImageToPBO << > > (pbo, cam.resolution, iter, dev_TrousImage); + sendImageToPBOCopy << > > (pbo, cam.resolution, iter, dev_TrousImage); } diff --git a/src/pathtrace.h b/src/pathtrace.h index 5ae7dfc..f9a7366 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -3,10 +3,11 @@ #include #include "scene.h" -void pathtraceInit(Scene *scene, float a, float b, float c, float* gausKernel, float filterSize); +void pathtraceInit(Scene *scene, float* gausKernel); void pathtraceFree(); void pathtrace(int frame, int iteration); void showGBuffer(uchar4 *pbo); void showImage(uchar4 *pbo, int iter); void showDenoise(uchar4 *pbo, int iter); +bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, int colWeight, int norWeight, int posWeight); \ No newline at end of file diff --git a/src/preview.cpp b/src/preview.cpp index 3ca2718..994f657 100644 --- a/src/preview.cpp +++ b/src/preview.cpp @@ -214,7 +214,7 @@ void drawGui(int windowWidth, int windowHeight) { ImGui::Checkbox("Denoise", &ui_denoise); - ImGui::SliderInt("Filter Size", &ui_filterSize, 0, 100); + ImGui::SliderInt("Filter Size", &ui_filterSize, 0, 300); ImGui::SliderFloat("Color Weight", &ui_colorWeight, 0.0f, 10.0f); ImGui::SliderFloat("Normal Weight", &ui_normalWeight, 0.0f, 10.0f); ImGui::SliderFloat("Position Weight", &ui_positionWeight, 0.0f, 10.0f); From b8163e48aeb0a5a607fb78b68fb54576f8a08aad Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Fri, 22 Oct 2021 23:04:53 -0400 Subject: [PATCH 13/14] refactored code --- src/pathtrace.cu | 189 ++++++++++++++++++++++------------------------- src/pathtrace.h | 2 +- 2 files changed, 90 insertions(+), 101 deletions(-) diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 57f4ecd..c295723 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -16,6 +16,14 @@ #define ERRORCHECK 1 + +//Flags for Denoising +#define SimpleGaussianDenoise 0 +#define ATrous1 1 +#define ATrous2 0 + + + #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) void checkCUDAErrorFn(const char* msg, const char* file, int line) { @@ -77,29 +85,6 @@ __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, } } -//Kernel that writes the image to the OpenGL PBO directly. -__global__ void sendImageToPBOCopy(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int)((pix.x / iter) * 255.0), 0, 255); - color.y = glm::clamp((int)((pix.y / iter) * 255.0), 0, 255); - color.z = glm::clamp((int)((pix.z / iter) * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } -} - __global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) { int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; @@ -272,35 +257,35 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path __global__ void GenerateGaussianBlur( - int num_paths, int filterSize, + int pixelCount, int stepWidth, float* dev_gausKernel, glm::vec2 *dev_offsetKernel, glm::vec3* dev_colorImage, glm::vec3 *dev_TrousImage, - const Camera cam + int resolutionX, int resolutionY ) { int index = blockIdx.x * blockDim.x + threadIdx.x; - if (index < num_paths) + if (index < pixelCount) { glm::vec3 currColor =glm::vec3(0.0f); //glm::vec3 currColor = dev_colorImage[index]; - for (int i = 0; i < filterSize ; i++) + for (int i = 0; i < 25 ; i++) { - int index2D_y = index / cam.resolution.x; - int index2D_x = (int)(index % cam.resolution.x); + int index2D_y = index / resolutionX; + int index2D_x = (int)(index % resolutionX); int offsetX = dev_offsetKernel[i].x; int offsetY = dev_offsetKernel[i].y; - int finalValue_X = index2D_x + offsetX; - int finalValue_Y = index2D_y + offsetY; + int finalValue_X = index2D_x + offsetX * stepWidth; + int finalValue_Y = index2D_y + offsetY * stepWidth; - if (finalValue_X >= 0 && finalValue_X <= (cam.resolution.x - 1) && finalValue_Y >= 0 && finalValue_Y <= (cam.resolution.y - 1)) + if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) { float gausValue = dev_gausKernel[i]; - int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; - if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) + int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; + if (offsetColorIdx >= 0 && offsetColorIdx < pixelCount) { glm::vec3 newColor = dev_colorImage[offsetColorIdx]; currColor += newColor * dev_gausKernel[i]; @@ -312,46 +297,22 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } - __global__ void GenerateGaussianBlur2( - int num_paths, int filterSize, - float* dev_gausKernel, glm::vec2* dev_offsetKernel, - glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, - const Camera cam - ) - { - - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < cam.resolution.x && y < cam.resolution.y) - { - int index = x + (y * cam.resolution.x); - glm::vec3 color = dev_colorImage[index]; - glm::vec3 currColor = glm::vec3(0.0f); - //glm::vec3 currColor = dev_colorImage[index]; - for (int i = 0; i < filterSize * filterSize; i++) - { - int offsetX = dev_offsetKernel[i].x; - int offsetY = dev_offsetKernel[i].y; - int finalValue_X = x + offsetX; - int finalValue_Y = y + offsetY; - - finalValue_X = glm::clamp(finalValue_X, 0, cam.resolution.x - 1); - finalValue_Y = glm::clamp(finalValue_Y, 0, cam.resolution.y - 1); - float gausValue = dev_gausKernel[i]; - int offsetColorIdx = finalValue_Y * cam.resolution.x + finalValue_X; - /* if (offsetColorIdx >= 0 && offsetColorIdx < num_paths) - {*/ - glm::vec3 newColor = dev_colorImage[offsetColorIdx]; - currColor += newColor * dev_gausKernel[i]; - /*}*/ - - } - dev_TrousImage[index] = currColor; - } - - } - + /// + /// This A Trous Kernel performs conversion from 1D index to 2D. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// __global__ void GenerateAtrousImage( int pixelCount, int stepWidth, float* dev_gausKernel, glm::vec2* dev_offsetKernel, @@ -386,10 +347,6 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path int finalValue_X = index2D_x + offsetX * stepWidth; // Final Offset Values int finalValue_Y = index2D_y + offsetY * stepWidth; // Final Offset Values - - //finalValue_X = glm::clamp((float)finalValue_X, 0.0f, (float)resolutionX - 1.0f); - //finalValue_Y = glm::clamp((float)finalValue_Y, 0.0f, (float)resolutionY - 1.0f); - if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) { int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; @@ -398,19 +355,16 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path glm::vec3 ctmp = dev_colorImage[offsetColorIdx]; glm::vec3 t = cval - ctmp; float dist2 = glm::dot(t, t); - float newVal = glm::exp(-1 * (dist2) / cphi); float c_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); glm::vec3 ntmp = gbuf[offsetColorIdx].normal; t = nval - ntmp; dist2 = glm::max(glm::dot(t, t)/ (stepWidth * stepWidth), 0.0f); - newVal = glm::exp(-1 * (dist2) / nphi ); float n_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); glm::vec3 ptmp = gbuf[offsetColorIdx].position; t = pval - ptmp; dist2 = glm::dot(t, t); - newVal = glm::exp(-1 * (dist2) / pphi); float p_w = glm::min(glm::exp(-(dist2) / cphi), 1.0f); float weight = c_w * n_w * p_w; @@ -420,13 +374,35 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } } } - + if (cum_w == 0.f) + { + dev_TrousImage[index] = cval; + return; + } + dev_TrousImage[index] = sum / cum_w; + } } - - __global__ void GenerateAtrousImage2( + + /// + /// This A Trous Kernel already starts out with 2D indexes + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + __global__ void GenerateAtrousImageGeneral( int pixelCount, int stepWidth, float* dev_gausKernel, glm::vec2* dev_offsetKernel, glm::vec3* dev_colorImage, glm::vec3* dev_TrousImage, @@ -460,8 +436,6 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path int finalValue_X = x + offsetX * stepWidth; int finalValue_Y = y + offsetY * stepWidth; - //int finalValue_X = glm::clamp ((float)x + (float)offsetX * stepWidth, 0.0f, (float)resolutionX - 1.0f); - //int finalValue_Y = glm::clamp((float)y + (float)offsetY * stepWidth, 0.0f, (float)resolutionY - 1.0f); if (finalValue_X >= 0 && finalValue_X <= (resolutionX - 1) && finalValue_Y >= 0 && finalValue_Y <= (resolutionY - 1)) { int offsetColorIdx = finalValue_Y * resolutionX + finalValue_X; @@ -739,12 +713,6 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path // Assemble this iteration and apply it to the image dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; finalGather << > > (num_paths, dev_image, dev_paths); -// GenerateGaussianBlur2 << > > (num_paths, ui_filterSize,dev_gausKernel, dev_offsetKernel, -// dev_image, dev_TrousImage, cam); - //GenerateAtrousImage << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, - // dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); - // GenerateAtrousImage2 << > > (num_paths, ui_filterSize ,dev_gausKernel, dev_offsetKernel, - //dev_image, dev_TrousImage, dev_gBuffer, cam, ui_colorWeight, ui_normalWeight, ui_positionWeight); /////////////////////////////////////////////////////////////////////////// // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images. @@ -771,7 +739,7 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path gbufferToPBO_Position <<>>(pbo, cam.resolution, dev_gBuffer); } - __global__ void GeneratePingPongImage(int pixelCount, glm::vec3* devImage, glm::vec3* pingPongImage) + __global__ void GeneratePingPongImage(int pixelCount, glm::vec3* devImage, glm::vec3* pingPongImage, int iter) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; if (index < pixelCount) @@ -781,30 +749,51 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path } - bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, int colWeight, int norWeight, int posWeight) + bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, float colWeight, float norWeight, float posWeight) { int pixelCount = resolutionX * resolutionY; const int blockSize1d = 128; dim3 numblocksPathSegmentTracing = (pixelCount + blockSize1d - 1) / blockSize1d; - // 2D block for generating ray from camera + + const dim3 blockSize2d(8, 8); const dim3 blocksPerGrid2d( (resolutionX + blockSize2d.x - 1) / blockSize2d.x, (resolutionY + blockSize2d.y - 1) / blockSize2d.y); int stepWidth = 1; - int blur_iterations = ceil(glm::log2((filterSize) / 4.f)); - GeneratePingPongImage << < numblocksPathSegmentTracing, blockSize1d >> > (pixelCount, dev_image, dev_pingPongImage); + //int blur_iterations = ceil(glm::log2( (filterSize/25.f) * (filterSize / 25.f)) ); // This wavelet scalling is the correct option though + int blur_iterations = ceil(glm::log2( (filterSize* filterSize) / 25.f) ); // This wavelet scalling worked best for me + blur_iterations = blur_iterations <= 0 ? 1 : blur_iterations; + int colorWeight = colWeight; + + GeneratePingPongImage << < numblocksPathSegmentTracing, blockSize1d >> > (pixelCount, dev_image, dev_pingPongImage, iteration); for (int i = 1; i < blur_iterations; i ++) { +#if SimpleGaussianDenoise + + + GenerateGaussianBlur <<< numblocksPathSegmentTracing, blockSize1d >> > (pixelCount, stepWidth, dev_gausKernel, dev_offsetKernel, + dev_pingPongImage, dev_TrousImage, resolutionX, resolutionY); + +#endif + +#if ATrous1 // 1D block for path tracing GenerateAtrousImage << > > (pixelCount, stepWidth, dev_gausKernel, dev_offsetKernel, + dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colorWeight, norWeight, posWeight); +#endif + + +#if ATrous2 + GenerateAtrousImage2 << > > (pixelCount, i,dev_gausKernel, dev_offsetKernel, dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colWeight, norWeight, posWeight); +#endif + stepWidth *= 2; - //GenerateAtrousImage2 << > > (pixelCount, i,dev_gausKernel, dev_offsetKernel, - // dev_pingPongImage, dev_TrousImage, dev_gBuffer, resolutionX, resolutionY, colWeight, norWeight, posWeight); + colorWeight /= 2; swap(dev_pingPongImage, dev_TrousImage); } swap(dev_pingPongImage, dev_TrousImage); // Getting the data back to dev_TrousImage @@ -832,5 +821,5 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // Send results to OpenGL buffer for rendering - sendImageToPBOCopy << > > (pbo, cam.resolution, iter, dev_TrousImage); + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_TrousImage); } diff --git a/src/pathtrace.h b/src/pathtrace.h index f9a7366..a2791df 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -10,4 +10,4 @@ void showGBuffer(uchar4 *pbo); void showImage(uchar4 *pbo, int iter); void showDenoise(uchar4 *pbo, int iter); -bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, int colWeight, int norWeight, int posWeight); \ No newline at end of file +bool DenoiseImage(int resolutionX, int resolutionY, int iteration, int filterSize, float colWeight, float norWeight, float posWeight); \ No newline at end of file From 2f5b4c9f5b317dd35ccf2e5ea4832e6e21aa0099 Mon Sep 17 00:00:00 2001 From: Shubham Sharma Date: Sun, 24 Oct 2021 01:55:28 -0400 Subject: [PATCH 14/14] Added TImer --- CMakeLists.txt | 2 + src/common.cu | 40 +++++++++++++++ src/common.h | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 44 ++++++++++++++++- 4 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 src/common.cu create mode 100644 src/common.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 162568b..ca9eb84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ set(headers src/sceneStructs.h src/preview.h src/utilities.h +src/common.h ) set(sources @@ -84,6 +85,7 @@ set(sources src/scene.cpp src/preview.cpp src/utilities.cpp +src/common.cu ) set(imgui diff --git a/src/common.cu b/src/common.cu new file mode 100644 index 0000000..7a83529 --- /dev/null +++ b/src/common.cu @@ -0,0 +1,40 @@ +#include "common.h" + + +namespace StreamCompaction { + namespace Common { + + /** + * Maps an array to an array of 0s and 1s for stream compaction. Elements + * which map to 0 will be removed, and elements which map to 1 will be kept. + */ + __global__ void kernMapToBoolean(int n, int* bools1, int* bools2, const int* idata) { + // TODO + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) { + return; + } + + int result = idata[index] != 0; + bools1[index] = result; + bools2[index] = result; + } + + /** + * Performs scatter on an array. That is, for each element in idata, + * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. + */ + __global__ void kernScatter(int n, int* odata, + const int* idata, const int* bools, const int* indices) { + // TODO + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index >= n) { + return; + } + + if (bools[index]) { + odata[indices[index]] = idata[index]; + } + } + } +} \ No newline at end of file diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..d2c1fed --- /dev/null +++ b/src/common.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) + +/** + * Check for CUDA errors; print and exit if there was a problem. + */ +void checkCUDAErrorFn(const char *msg, const char *file = NULL, int line = -1); + +inline int ilog2(int x) { + int lg = 0; + while (x >>= 1) { + ++lg; + } + return lg; +} + +inline int ilog2ceil(int x) { + return x == 1 ? 0 : ilog2(x - 1) + 1; +} + +namespace StreamCompaction { + namespace Common { + __global__ void kernMapToBoolean(int n, int *bools, const int *idata); + + __global__ void kernScatter(int n, int *odata, + const int *idata, const int *bools, const int *indices); + + /** + * This class is used for timing the performance + * Uncopyable and unmovable + * + * Adapted from WindyDarian(https://github.com/WindyDarian) + */ + class PerformanceTimer + { + public: + PerformanceTimer() + { + cudaEventCreate(&event_start); + cudaEventCreate(&event_end); + } + + ~PerformanceTimer() + { + cudaEventDestroy(event_start); + cudaEventDestroy(event_end); + } + + void startCpuTimer() + { + if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); } + cpu_timer_started = true; + + time_start_cpu = std::chrono::high_resolution_clock::now(); + } + + void endCpuTimer() + { + time_end_cpu = std::chrono::high_resolution_clock::now(); + + if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); } + + std::chrono::duration duro = time_end_cpu - time_start_cpu; + prev_elapsed_time_cpu_milliseconds = + static_cast(duro.count()); + + cpu_timer_started = false; + } + + void startGpuTimer() + { + if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); } + gpu_timer_started = true; + + cudaEventRecord(event_start); + } + + void endGpuTimer() + { + cudaEventRecord(event_end); + cudaEventSynchronize(event_end); + + if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); } + + cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); + gpu_timer_started = false; + } + + float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015 + { + return prev_elapsed_time_cpu_milliseconds; + } + + float getGpuElapsedTimeForPreviousOperation() //noexcept + { + return prev_elapsed_time_gpu_milliseconds; + } + + // remove copy and move functions + PerformanceTimer(const PerformanceTimer&) = delete; + PerformanceTimer(PerformanceTimer&&) = delete; + PerformanceTimer& operator=(const PerformanceTimer&) = delete; + PerformanceTimer& operator=(PerformanceTimer&&) = delete; + + private: + cudaEvent_t event_start = nullptr; + cudaEvent_t event_end = nullptr; + + using time_point_t = std::chrono::high_resolution_clock::time_point; + time_point_t time_start_cpu; + time_point_t time_end_cpu; + + bool cpu_timer_started = false; + bool gpu_timer_started = false; + + float prev_elapsed_time_cpu_milliseconds = 0.f; + float prev_elapsed_time_gpu_milliseconds = 0.f; + }; + } +} diff --git a/src/main.cpp b/src/main.cpp index d25d5cb..ce6124c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,7 +5,7 @@ #include "../imgui/imgui.h" #include "../imgui/imgui_impl_glfw.h" #include "../imgui/imgui_impl_opengl3.h" - +#include "common.h" static std::string startTimeString; // For camera controls @@ -46,6 +46,18 @@ int iteration; int width; int height; +static float timePT; +static float timeAT; +static bool hasPrinted; +using StreamCompaction::Common::PerformanceTimer; +#define TIMER 1 + +PerformanceTimer& timer() +{ + static PerformanceTimer timer; + return timer; +} + void FilterCreation(int filter_size, float *kernel) { // initialising standard deviation to 1.0 @@ -189,9 +201,26 @@ void runCuda() { if (iteration < ui_iterations) { iteration++; +#if TIMER + // Start Timer + if (iteration == 1) + { + timePT = 0.f; + } + timer().startCpuTimer(); +#endif // TIMER + // execute the kernel int frame = 0; pathtrace(frame, iteration); + +#if TIMER + timer().endCpuTimer(); + timePT += timer().getCpuElapsedTimeForPreviousOperation(); + if (iteration == ui_iterations) { + std::cout << "Path-trace time for " << iteration << " iterations: " << timePT << "ms" << std::endl; + } +#endif // TIMER } if (ui_showGbuffer) { @@ -201,8 +230,21 @@ void runCuda() { { if (!imageDenoised) { +#if TIMER + // Start Timer + timeAT = 0.f; + if (!hasPrinted) { + timer().startCpuTimer(); + } +#endif // TIMER imageDenoised = DenoiseImage(renderState->camera.resolution.x, renderState->camera.resolution.y, iteration, ui_filterSize, ui_colorWeight, ui_normalWeight, ui_positionWeight); + +#if TIMER + timer().endCpuTimer(); + timeAT += timer().getCpuElapsedTimeForPreviousOperation(); + std::cout << "Denoise time for " << iteration << " iterations: " << timeAT << "ms\n\n" << std::endl; +#endif // TIMER } showDenoise(pbo_dptr, iteration); }