diff --git a/CMakeLists.txt b/CMakeLists.txt index d3d976c..0c5e133 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,10 +86,4 @@ target_link_libraries(${CMAKE_PROJECT_NAME} ${CORELIBS} ) -add_custom_command( - TARGET ${CMAKE_PROJECT_NAME} - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CMAKE_SOURCE_DIR}/shaders - ${CMAKE_BINARY_DIR}/shaders - ) + diff --git a/README.md b/README.md index 110697c..2dcb27f 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,99 @@ CUDA Path Tracer ================ -**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3** +**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Yu Sun +* [LinkedIn](https://www.linkedin.com/in/yusun3/) +* Tested on: Tested on: Windows 10 , i7-6700HQ CPU @ 2.60GHz × 8 , GeForce GTX 960M/PCIe/SSE2, 7.7GB Memory (Personal Laptop) -### (TODO: Your README) +## Introduction -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +In this project, a basic path tracer with static scene rendering and shading is implemented. The tracer can shade objects with +diffusive, specular and refractive surfaces with light source in the scene. + +Below is a very simple scene demonstrating the effect. +![](img/scene.png) +![](img/closed.png) + +The techniques used in order to make the shading process faster include caching the first bounces, sorting the materials by id, and +stream compaction. Caching can help because the first bounce of the rays would almost always be the same. Sorting the materials by id would help the +access pattern of the threads, and stream compaction essentially reduce the number of computation that needs to be done. + +Additional features written include anti-aliasing, using depth of field and motion blur. + +***Anti-aliasing*** + +Anti-aliasing is a very simple technique that you add some random gaussian noise when shooting the rays so the boundary looks more natural. + +An example of scene with and without anti-aliasing effect is shown below, look how sharp the edge look on scene without anti-aliasing effect + +Scene with anti-aliasing | Scene without anti-aliasing +:-------------------------:|:-------------------------: +![](img/white.png) | ![](img/alias.png) + + +***Depth of Field*** + +Depth of field can be achieved by specifying a camera model with varying focal length and lens size. It uses a technique that's called concentric disc sampling and +is explained in detail in the book Physically Based Rendering. The result is shown below: + +Focal Length = 10 | Focal Length = 15 +:-------------------------:|:-------------------------: +![](img/focal10.png) | ![](img/focal15.png) + +LenSize = 0.2 | LenSize = 0.5 +:-------------------------:|:-------------------------: +![](img/lensize.png) | ![](img/focal15.png) + + +***Motion Blur*** + +Motion blur can be achieved by sampling the objects in different locations while it moves. The movement speed will effect how the object is captured by the camera. + +Speed = 1 | Speed = 2 +:-------------------------:|:-------------------------: +![](img/speed1.png) | ![](img/speed2.png) + + + +***Refraction*** + +Refraction effect can be achieved by using Snell's law. It helps the object looks more realistic with the effect of letting light through + +Without refraction | With Refraction +:-------------------------:|:-------------------------: +![](img/no_refract.png) | ![](img/scene.png) + + +## Analysis + +So I mention briefly that stream compaction can help reduce the amount of computation we have. Below is a more concreate analysis on the number of rays reduced after stream compaction. Notice the difference when the scene is closed and when it's open. + +![](img/sc.png) + +This makes sense as in closed scene the rays needs to bounce more before hitting a light source and terminating, while in open scene it can soon be terminated as it's shooting outside. + +It can also be seen that in general the algorithm runs faster when it's in open scene. + +![](img/time.png) + +It is also true that using cache helps improving the speed as shown below. + +However, when we need to go with randomrization in the scene creating anti-aliasing effect or motion blur. The technique cannot be used anymore. + +On the other hand, we could still use sorting to help increase the speed. However, although in theory sorting can help increase the performance. In reality, +one needs to consider its overhead as sorting does come with a price. + +Closed Scene | Open scene +:-------------------------:|:-------------------------: +![](img/sort_closed.png) | ![](img/sort_open.png) + +In general, it's more helpful to use sorting in open scene as in closed scene, the rays are rarely terminating. Therefore, the gain of making the memory access continuous +may not be that much. + +## References + +* [PBRT] Physically Based Rendering, Second Edition: From Theory To Implementation. Pharr, Matt and Humphreys, Greg. 2010. +* Wikipedia diff --git a/img/alias.png b/img/alias.png new file mode 100644 index 0000000..95615a1 Binary files /dev/null and b/img/alias.png differ diff --git a/img/cache.png b/img/cache.png new file mode 100644 index 0000000..16ce36a Binary files /dev/null and b/img/cache.png differ diff --git a/img/closed.png b/img/closed.png new file mode 100644 index 0000000..0853521 Binary files /dev/null and b/img/closed.png differ diff --git a/img/cornell.2018-10-01_00-01-49z.195samp.png b/img/cornell.2018-10-01_00-01-49z.195samp.png new file mode 100644 index 0000000..33c33a3 Binary files /dev/null and b/img/cornell.2018-10-01_00-01-49z.195samp.png differ diff --git a/img/focal10.png b/img/focal10.png new file mode 100644 index 0000000..d5f5d0f Binary files /dev/null and b/img/focal10.png differ diff --git a/img/focal15.png b/img/focal15.png new file mode 100644 index 0000000..ef2f0ab Binary files /dev/null and b/img/focal15.png differ diff --git a/img/lensize.png b/img/lensize.png new file mode 100644 index 0000000..4fbac03 Binary files /dev/null and b/img/lensize.png differ diff --git a/img/no_refract.png b/img/no_refract.png new file mode 100644 index 0000000..1463cf1 Binary files /dev/null and b/img/no_refract.png differ diff --git a/img/purple.png b/img/purple.png new file mode 100644 index 0000000..7e3f8fb Binary files /dev/null and b/img/purple.png differ diff --git a/img/sc.png b/img/sc.png new file mode 100644 index 0000000..d98912f Binary files /dev/null and b/img/sc.png differ diff --git a/img/scene.png b/img/scene.png new file mode 100644 index 0000000..ac3a3c0 Binary files /dev/null and b/img/scene.png differ diff --git a/img/sort_closed.png b/img/sort_closed.png new file mode 100644 index 0000000..c238c29 Binary files /dev/null and b/img/sort_closed.png differ diff --git a/img/sort_open.png b/img/sort_open.png new file mode 100644 index 0000000..1c59ffc Binary files /dev/null and b/img/sort_open.png differ diff --git a/img/speed1.png b/img/speed1.png new file mode 100644 index 0000000..b707a36 Binary files /dev/null and b/img/speed1.png differ diff --git a/img/speed2.png b/img/speed2.png new file mode 100644 index 0000000..e82dc49 Binary files /dev/null and b/img/speed2.png differ diff --git a/img/time.png b/img/time.png new file mode 100644 index 0000000..bbea4c6 Binary files /dev/null and b/img/time.png differ diff --git a/img/white.png b/img/white.png new file mode 100644 index 0000000..ece997e Binary files /dev/null and b/img/white.png differ diff --git a/scenes/cornell.txt b/scenes/cornell.txt index 83ff820..5fdc76e 100644 --- a/scenes/cornell.txt +++ b/scenes/cornell.txt @@ -40,9 +40,9 @@ EMITTANCE 0 // Specular white MATERIAL 4 -RGB .98 .98 .98 +RGB .98 0.5 .98 SPECEX 0 -SPECRGB .98 .98 .98 +SPECRGB .98 .5 .98 REFL 1 REFR 0 REFRIOR 0 diff --git a/scenes/cornell_blur.txt b/scenes/cornell_blur.txt new file mode 100644 index 0000000..7bae0c8 --- /dev/null +++ b/scenes/cornell_blur.txt @@ -0,0 +1,171 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 +LENSIZE 0.5 +FOCALLEN 15 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 +SPEED 0 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 +SPEED 0 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 +SPEED 0 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -15 +ROTAT 0 90 0 +SCALE .01 10 10 +SPEED 0 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 +SPEED 0 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 +SPEED 0 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -1 5 -2 +ROTAT 0 0 0 +SCALE 2 2 2 +SPEED 1.8 + +// Ceiling light +OBJECT 7 +cube +material 0 +TRANS 0 10 -10 +ROTAT 0 0 0 +SCALE 3 .3 3 +SPEED 0 + +// Floor +OBJECT 8 +cube +material 1 +TRANS 0 0 -10 +ROTAT 0 0 0 +SCALE 10 .01 10 +SPEED 0 + +// Ceiling +OBJECT 9 +cube +material 1 +TRANS 0 10 -10 +ROTAT 0 0 90 +SCALE .01 10 10 +SPEED 0 + +// Left wall +OBJECT 10 +cube +material 2 +TRANS -5 5 -10 +ROTAT 0 0 0 +SCALE .01 10 10 +SPEED 0 + +// Right wall +OBJECT 11 +cube +material 3 +TRANS 5 5 -10 +ROTAT 0 0 0 +SCALE .01 10 10 +SPEED 0 \ No newline at end of file diff --git a/scenes/cornell_closed.txt b/scenes/cornell_closed.txt new file mode 100644 index 0000000..5b79813 --- /dev/null +++ b/scenes/cornell_closed.txt @@ -0,0 +1,125 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 0.5 .98 +SPECEX 0 +SPECRGB .98 .5 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 0 +LOOKAT 0 5 -5 +UP 0 1 0 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -1 4 -5 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Closed wall +OBJECT 7 +cube +material 1 +TRANS 0 5 5 +ROTAT 0 90 0 +SCALE .01 10 10 diff --git a/scenes/cornell_dof.txt b/scenes/cornell_dof.txt new file mode 100644 index 0000000..e708298 --- /dev/null +++ b/scenes/cornell_dof.txt @@ -0,0 +1,191 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 +LENSIZE 0.5 +FOCALLEN 15 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -15 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -1 1.5 -2 +ROTAT 0 0 0 +SCALE 2 2 2 + +// Ceiling light +OBJECT 7 +cube +material 0 +TRANS 0 10 -10 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 8 +cube +material 1 +TRANS 0 0 -10 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 9 +cube +material 1 +TRANS 0 10 -10 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Left wall +OBJECT 10 +cube +material 2 +TRANS -5 5 -10 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 11 +cube +material 3 +TRANS 5 5 -10 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 12 +sphere +material 4 +TRANS -1 1.5 2 +ROTAT 0 0 0 +SCALE 2 2 2 + +// Sphere +OBJECT 13 +sphere +material 4 +TRANS -1 1.5 -6 +ROTAT 0 0 0 +SCALE 2 2 2 + +// Sphere +OBJECT 14 +sphere +material 4 +TRANS -1 1.5 -9 +ROTAT 0 0 0 +SCALE 2 2 2 + +// Sphere +OBJECT 15 +sphere +material 4 +TRANS -1 1.5 -12 +ROTAT 0 0 0 +SCALE 2 2 2 diff --git a/scenes/cornell_refract.txt b/scenes/cornell_refract.txt new file mode 100644 index 0000000..ca689d1 --- /dev/null +++ b/scenes/cornell_refract.txt @@ -0,0 +1,165 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Emissive material (light) +MATERIAL 5 +RGB 0.98 .65 0 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Refractive white +MATERIAL 6 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 1 +REFRIOR 1.5 +EMITTANCE 0 + +// Emissive material (light) +MATERIAL 7 +RGB 0 0.7 0.98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 2.5 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 +DOFX 0 +DOFY 10 + + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -2 3 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Floor light +OBJECT 7 +cube +material 5 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 5 .3 5 + +// Sphere +OBJECT 8 +sphere +material 6 +TRANS 3 2 0 +ROTAT 0 0 0 +SCALE 3 3 3 \ No newline at end of file diff --git a/scenes/sphere.txt b/scenes/sphere.txt index a74b545..dfcc691 100644 --- a/scenes/sphere.txt +++ b/scenes/sphere.txt @@ -25,4 +25,4 @@ sphere material 0 TRANS 0 0 0 ROTAT 0 0 0 -SCALE 3 3 3 +SCALE 3 3 3 \ No newline at end of file diff --git a/src/interactions.h b/src/interactions.h index 5ce3628..28f0735 100644 --- a/src/interactions.h +++ b/src/interactions.h @@ -41,6 +41,11 @@ glm::vec3 calculateRandomDirectionInHemisphere( + sin(around) * over * perpendicularDirection2; } + +//__host__ __device__ +//glm::vec3 calculateRandomDirectionReflective( +// glm::vec3 normal, thrust::default_random_engine &rng) + /** * Scatter a ray with some probabilities according to the material properties. * For example, a diffuse surface scatters in a cosine-weighted hemisphere. @@ -76,4 +81,28 @@ void scatterRay( // TODO: implement this. // A basic implementation of pure-diffuse shading will just call the // calculateRandomDirectionInHemisphere defined above. + thrust::uniform_real_distribution u01(0, 1); + + if (m.hasReflective > 0){ + pathSegment.ray.direction = glm::reflect(pathSegment.ray.direction, normal); + pathSegment.color *= m.specular.color; + }else if (m.hasRefractive > 0){ + float cos_theta = glm::dot(pathSegment.ray.direction, normal); + float R0 = powf((1 - m.indexOfRefraction) / (1 + m.indexOfRefraction), 2); + float R = R0 + (1 - R0) * powf((1 - fabs(cos_theta)), 5); + if (R > u01(rng)) pathSegment.ray.direction = glm::reflect(pathSegment.ray.direction, normal); + else { + if (cos_theta < 0) + pathSegment.ray.direction = glm::refract(pathSegment.ray.direction, normal, 1.0f / m.indexOfRefraction); + else + pathSegment.ray.direction = glm::refract(pathSegment.ray.direction, normal, m.indexOfRefraction); + pathSegment.color *= m.specular.color; + } + } + else + pathSegment.ray.direction = calculateRandomDirectionInHemisphere(normal, rng); + pathSegment.color *= m.color; + pathSegment.ray.origin = intersect + 0.005f * pathSegment.ray.direction; + + } diff --git a/src/pathtrace.cu b/src/pathtrace.cu index c1ec122..86392fc 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -9,62 +9,73 @@ #include "scene.h" #include "glm/glm.hpp" #include "glm/gtx/norm.hpp" +#include "glm/gtc/matrix_transform.hpp" +#include "glm/gtc/matrix_inverse.hpp" + #include "utilities.h" #include "pathtrace.h" #include "intersections.h" #include "interactions.h" +#include "stream_compaction/efficient_sm.h" + #define ERRORCHECK 1 +#define ANTI_ALIAS 0 +#define MOTION_BLUR 0 +#define DOF 0 +#define WORK_EFFICIENT 0 // do not make this to one, feature not tested +#define CACHING 0 + #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) void checkCUDAErrorFn(const char *msg, const char *file, int line) { #if ERRORCHECK - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } - - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); # ifdef _WIN32 - getchar(); + getchar(); # endif - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); #endif } __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { - int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); - return thrust::default_random_engine(h); + int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); + return thrust::default_random_engine(h); } //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); + color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); + color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } static Scene * hst_scene = NULL; @@ -74,43 +85,66 @@ static Material * dev_materials = NULL; static PathSegment * dev_paths = NULL; static ShadeableIntersection * dev_intersections = NULL; // TODO: static variables for device memory, any extra info you need, etc -// ... +static ShadeableIntersection * dev_intersections_cache = NULL; +static cudaEvent_t start, stop; void pathtraceInit(Scene *scene) { - hst_scene = scene; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + hst_scene = scene; + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; - cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); - cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); + cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); - cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); - cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); - cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); + cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); + cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + + cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + // TODO: initialize any extra device memeory you need + cudaMalloc(&dev_intersections_cache, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections_cache, 0, pixelcount * sizeof(ShadeableIntersection)); - cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); - cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); - cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - // TODO: initialize any extra device memeory you need - checkCUDAError("pathtraceInit"); + checkCUDAError("pathtraceInit"); } void pathtraceFree() { - cudaFree(dev_image); // no-op if dev_image is null - cudaFree(dev_paths); - cudaFree(dev_geoms); - cudaFree(dev_materials); - cudaFree(dev_intersections); - // TODO: clean up any extra device memory you created - - checkCUDAError("pathtraceFree"); + cudaFree(dev_image); // no-op if dev_image is null + cudaFree(dev_paths); + cudaFree(dev_geoms); + cudaFree(dev_materials); + cudaFree(dev_intersections); + cudaFree(dev_intersections_cache); + // TODO: clean up any extra device memory you created + + checkCUDAError("pathtraceFree"); } +__device__ glm::vec2 ConcentricSampleDisk(const glm::vec2 & u){ + glm::vec2 uoffset = 2.f * u - glm::vec2(1.0, 1.0); + if (uoffset == glm::vec2(0, 0)) return uoffset; + float theta, r; + if (fabsf(uoffset.x) > fabsf(uoffset.y)){ + r = uoffset.x; + theta = PI/4 * (uoffset.y / uoffset.x); + }else{ + r = uoffset.y; + theta = PI/2 - PI/4 * (uoffset.x - uoffset.y); + } + + return r * glm::vec2(cos(theta), sin(theta)); +} + + /** * Generate PathSegments with rays from the camera through the screen into the * scene, which is the first bounce of rays. @@ -129,31 +163,57 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path PathSegment & segment = pathSegments[index]; segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); // TODO: implement antialiasing by jittering the ray + thrust::default_random_engine rng = makeSeededRandomEngine(iter, index, 0); + thrust::uniform_real_distribution u01(0, 1); + +#if ANTI_ALIAS segment.ray.direction = glm::normalize(cam.view - - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) - ); + - cam.right * cam.pixelLength.x * ((float)(x + u01(rng)) - (float)cam.resolution.x * 0.5f) + - cam.up * cam.pixelLength.y * ((float)(y + u01(rng)) - (float)cam.resolution.y * 0.5f) + ); +#else + segment.ray.direction = glm::normalize(cam.view + - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) + - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) + ); +#endif + +#if MOTION_BLUR + segment.time_diff = u01(rng); +#endif + +#if DOF + glm::vec2 u(u01(rng), u01(rng)); + glm::vec2 pLens = cam.lensSize * ConcentricSampleDisk(u); + glm::vec3 pFocus = segment.ray.origin + glm::abs(cam.focalLength / segment.ray.direction.z) * segment.ray.direction; + segment.ray.origin += pLens.x * cam.right + pLens.y * cam.up; + segment.ray.direction = glm::normalize(pFocus - segment.ray.origin); + +#endif segment.pixelIndex = index; segment.remainingBounces = traceDepth; + } } + + // TODO: // computeIntersections handles generating ray intersections ONLY. // Generating new rays is handled in your shader(s). // Feel free to modify the code below. __global__ void computeIntersections( - int depth - , int num_paths - , PathSegment * pathSegments - , Geom * geoms - , int geoms_size - , ShadeableIntersection * intersections - ) + int depth + , int num_paths + , PathSegment * pathSegments + , Geom * geoms + , int geoms_size + , ShadeableIntersection * intersections +) { int path_index = blockIdx.x * blockDim.x + threadIdx.x; @@ -177,6 +237,18 @@ __global__ void computeIntersections( { Geom & geom = geoms[i]; +#if MOTION_BLUR + glm::vec3 interpolate_pos = (pathSegment.time_diff * geom.speed + 1) * geom.translation; + geom.transform = glm::translate(glm::mat4(), interpolate_pos) * + glm::rotate(glm::mat4(), geom.rotation.x * PI / 180, glm::vec3(1, 0, 0)) * + glm::rotate(glm::mat4(), geom.rotation.y * PI / 180, glm::vec3(0, 1, 0)) * + glm::rotate(glm::mat4(), geom.rotation.z * PI / 180, glm::vec3(0, 0, 1)) * + glm::scale(glm::mat4(), geom.scale); + + geom.inverseTransform = glm::inverse(geom.transform); + geom.invTranspose = glm::inverseTranspose(geom.transform); +#endif + if (geom.type == CUBE) { t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); @@ -208,6 +280,7 @@ __global__ void computeIntersections( intersections[path_index].t = t_min; intersections[path_index].materialId = geoms[hit_geom_index].materialid; intersections[path_index].surfaceNormal = normal; + intersections[path_index].point = intersect_point; } } } @@ -221,48 +294,50 @@ __global__ void computeIntersections( // Note that this shader does NOT do a BSDF evaluation! // Your shaders should handle that - this can allow techniques such as // bump mapping. -__global__ void shadeFakeMaterial ( - int iter - , int num_paths - , ShadeableIntersection * shadeableIntersections - , PathSegment * pathSegments - , Material * materials - ) +__global__ void shadeMaterialNaive ( + int iter + , int num_paths + , ShadeableIntersection * shadeableIntersections + , PathSegment * pathSegments + , Material * materials +) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - ShadeableIntersection intersection = shadeableIntersections[idx]; - if (intersection.t > 0.0f) { // if the intersection exists... - // Set up the RNG - // LOOK: this is how you use thrust's RNG! Please look at - // makeSeededRandomEngine as well. - thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0); - thrust::uniform_real_distribution u01(0, 1); - - Material material = materials[intersection.materialId]; - glm::vec3 materialColor = material.color; - - // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - pathSegments[idx].color *= (materialColor * material.emittance); - } - // Otherwise, do some pseudo-lighting computation. This is actually more - // like what you would expect from shading in a rasterizer like OpenGL. - // TODO: replace this! you should be able to start with basically a one-liner - else { - float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); - pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; - pathSegments[idx].color *= u01(rng); // apply some noise because why not - } - // If there was no intersection, color the ray black. - // Lots of renderers use 4 channel color, RGBA, where A = alpha, often - // used for opacity, in which case they can indicate "no opacity". - // This can be useful for post-processing and image compositing. - } else { - pathSegments[idx].color = glm::vec3(0.0f); - } - } + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + ShadeableIntersection intersection = shadeableIntersections[idx]; + if (intersection.t > 0.0f) { // if the intersection exists... + // Set up the RNG + // LOOK: this is how you use thrust's RNG! Please look at + // makeSeededRandomEngine as well. + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, pathSegments[idx].remainingBounces); + thrust::uniform_real_distribution u01(0, 1); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + pathSegments[idx].color *= (materialColor * material.emittance); + pathSegments[idx].remainingBounces = 0; + } + // Otherwise, do some pseudo-lighting computation. This is actually more + // like what you would expect from shading in a rasterizer like OpenGL. + // TODO: replace this! you should be able to start with basically a one-liner + else { + scatterRay(pathSegments[idx], intersection.point, intersection.surfaceNormal, material, rng); + pathSegments[idx].remainingBounces--; + + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } else { + pathSegments[idx].color = glm::vec3(0.0f); + pathSegments[idx].remainingBounces = 0; + } + } } // Add the current iteration's output to the overall image @@ -282,49 +357,64 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati * of memory management */ void pathtrace(uchar4 *pbo, int frame, int iter) { - const int traceDepth = hst_scene->state.traceDepth; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; + cudaEventCreate(&start); + cudaEventCreate(&stop); + const int traceDepth = hst_scene->state.traceDepth; + const Camera &cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; // 2D block for generating ray from camera - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); // 1D block for path tracing const int blockSize1d = 128; - /////////////////////////////////////////////////////////////////////////// - - // Recap: - // * Initialize array of path rays (using rays that come out of the camera) - // * You can pass the Camera object to that kernel. - // * Each path ray must carry at minimum a (ray, color) pair, - // * where color starts as the multiplicative identity, white = (1, 1, 1). - // * This has already been done for you. - // * For each depth: - // * Compute an intersection in the scene for each path ray. - // A very naive version of this has been implemented for you, but feel - // free to add more primitives and/or a better algorithm. - // Currently, intersection distance is recorded as a parametric distance, - // t, or a "distance along the ray." t = -1.0 indicates no intersection. - // * Color is attenuated (multiplied) by reflections off of any object - // * TODO: Stream compact away all of the terminated paths. - // You may use either your implementation or `thrust::remove_if` or its - // cousins. - // * Note that you can't really use a 2D kernel launch any more - switch - // to 1D. - // * TODO: Shade the rays that intersected something or didn't bottom out. - // That is, color the ray by performing a color computation according - // to the shader, then generate a new ray to continue the ray path. - // We recommend just updating the ray's PathSegment in place. - // Note that this step may come before or after stream compaction, - // since some shaders you write may also cause a path to terminate. - // * Finally, add this iteration's results to the image. This has been done - // for you. - - // TODO: perform one iteration of path tracing + + // motion blur, can be done in cpu as we don't have much objects + #if MOTION_BLUR + Geom * geoms = &(hst_scene->geoms)[0]; + for (int i = 0; i < hst_scene->geoms.size(); i++){ + if (iter > 1 && geoms[i].speed != 0.0f){ + if (geoms[i].speed > 0 ) geoms[i].speed -= 0.001f; + else geoms[i].speed = 0.0f; + } + } + cudaMemcpy(dev_geoms, geoms, hst_scene->geoms.size()* sizeof(Geom), cudaMemcpyHostToDevice); + #endif + + /////////////////////////////////////////////////////////////////////////// + + // Recap: + // * Initialize array of path rays (using rays that come out of the camera) + // * You can pass the Camera object to that kernel. + // * Each path ray must carry at minimum a (ray, color) pair, + // * where color starts as the multiplicative identity, white = (1, 1, 1). + // * This has already been done for you. + // * For each depth: + // * Compute an intersection in the scene for each path ray. + // A very naive version of this has been implemented for you, but feel + // free to add more primitives and/or a better algorithm. + // Currently, intersection distance is recorded as a parametric distance, + // t, or a "distance along the ray." t = -1.0 indicates no intersection. + // * Color is attenuated (multiplied) by reflections off of any object + // * TODO: Stream compact away all of the terminated paths. + // You may use either your implementation or `thrust::remove_if` or its + // cousins. + // * Note that you can't really use a 2D kernel launch any more - switch + // to 1D. + // * TODO: Shade the rays that intersected something or didn't bottom out. + // That is, color the ray by performing a color computation according + // to the shader, then generate a new ray to continue the ray path. + // We recommend just updating the ray's PathSegment in place. + // Note that this step may come before or after stream compaction, + // since some shaders you write may also cause a path to terminate. + // * Finally, add this iteration's results to the image. This has been done + // for you. + + // TODO: perform one iteration of path tracing generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); checkCUDAError("generate camera ray"); @@ -336,58 +426,124 @@ void pathtrace(uchar4 *pbo, int frame, int iter) { // --- PathSegment Tracing Stage --- // Shoot ray into scene, bounce between objects, push shading chunks - bool iterationComplete = false; + bool iterationComplete = false; while (!iterationComplete) { - // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections + // clean shading chunks + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + // tracing + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + + // compute the intersections and put them into cache + cudaEventRecord(start); + #if CACHING + if (depth == 0){ + if (iter == 1){ + computeIntersections <<>> ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + cudaMemcpy(dev_intersections_cache, dev_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + } + else { + cudaMemcpy(dev_intersections, dev_intersections_cache, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + } + }else{ + computeIntersections <<>> ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + } + #else + computeIntersections <<>> ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + ); + #endif + + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + float miliseconds = 0; + cudaEventElapsedTime(&miliseconds, start, stop); + if (iter % 10 == 0) cout << "Computing intersections " << miliseconds << endl; + + // sort by the materials + cudaEventRecord(start); + thrust::sort_by_key(thrust::device, dev_intersections, dev_intersections + num_paths, dev_paths, cmp_material()); + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + if (iter % 10 == 0) cout << "Sorting " << miliseconds << endl; + // TODO: + // --- Shading Stage --- + // Shade path segments based on intersections and generate new rays by + // evaluating the BSDF. + // Start off with just a big kernel that handles all the different + // materials you have in the scenefile. + // TODO: compare between directly shading the path segments and shading + // path segments that have been reshuffled to be contiguous in memory. + cudaEventRecord(start); + shadeMaterialNaive<<>> ( + iter, + num_paths, + dev_intersections, + dev_paths, + dev_materials ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); - depth++; - - - // TODO: - // --- Shading Stage --- - // Shade path segments based on intersections and generate new rays by - // evaluating the BSDF. - // Start off with just a big kernel that handles all the different - // materials you have in the scenefile. - // TODO: compare between directly shading the path segments and shading - // path segments that have been reshuffled to be contiguous in memory. - - shadeFakeMaterial<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = true; // TODO: should be based off stream compaction results. + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + if (iter % 10 == 0) cout << "Shading " << miliseconds << endl; + + cudaEventRecord(start); + + #if WORK_EFFICIENT + int * indices_buff = new int [pixelcount]; + num_paths = StreamCompaction::EfficientSM::compact(pixelcount, dev_paths, dev_paths, indices_buff); + delete[] indices_buff; + #else + // stream compaction with thrust + PathSegment* new_end = thrust::partition(thrust::device, dev_paths, dev_paths + num_paths, terminate_ray()); + // TODO::this might be wrong, debug and check + // actually might need just stream compaction + num_paths = new_end - dev_paths; + #endif + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&miliseconds, start, stop); + if (iter % 10 == 0) cout << "stream compaction " << miliseconds << endl; + + depth++; + iterationComplete = (num_paths <= 0) || (depth > traceDepth); } - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; - finalGather<<>>(num_paths, dev_image, dev_paths); + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + finalGather<<>>(pixelcount, dev_image, dev_paths); - /////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////// - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); + // Send results to OpenGL buffer for rendering + sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - checkCUDAError("pathtrace"); + checkCUDAError("pathtrace"); } diff --git a/src/preview.cpp b/src/preview.cpp index 4eb0bc1..47f48bc 100644 --- a/src/preview.cpp +++ b/src/preview.cpp @@ -169,12 +169,28 @@ bool init() { } void mainLoop() { + double fps = 0; + double timebase = 0; + int frame = 0; while (!glfwWindowShouldClose(window)) { glfwPollEvents(); + frame ++; + double time = glfwGetTime(); + + if (time - timebase > 1.0){ + fps = frame / (time - timebase); + timebase = time; + frame = 0; + } + runCuda(); - string title = "CIS565 Path Tracer | " + utilityCore::convertIntToString(iteration) + " Iterations"; - glfwSetWindowTitle(window, title.c_str()); + std::ostringstream title; + title << "CIS565 Path Tracer | " + utilityCore::convertIntToString(iteration) + " Iterations ["; + title.precision(1); + title << std::fixed << fps; + title << " fps]"; + glfwSetWindowTitle(window, title.str().c_str()); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo); glBindTexture(GL_TEXTURE_2D, displayImage); diff --git a/src/scene.cpp b/src/scene.cpp index cbae043..06c789c 100644 --- a/src/scene.cpp +++ b/src/scene.cpp @@ -74,7 +74,9 @@ int Scene::loadGeom(string objectid) { newGeom.rotation = glm::vec3(atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str())); } else if (strcmp(tokens[0].c_str(), "SCALE") == 0) { newGeom.scale = glm::vec3(atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str())); - } + } else if (strcmp(tokens[0].c_str(), "SPEED") == 0) { + newGeom.speed = atof(tokens[1].c_str()); + } utilityCore::safeGetline(fp_in, line); } @@ -124,6 +126,10 @@ int Scene::loadCamera() { camera.lookAt = glm::vec3(atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str())); } else if (strcmp(tokens[0].c_str(), "UP") == 0) { camera.up = glm::vec3(atof(tokens[1].c_str()), atof(tokens[2].c_str()), atof(tokens[3].c_str())); + } else if (strcmp(tokens[0].c_str(), "LENSIZE") == 0) { + camera.lensSize = atof(tokens[1].c_str()); + }else if (strcmp(tokens[0].c_str(), "FOCALLEN") == 0) { + camera.focalLength = atof(tokens[1].c_str()); } utilityCore::safeGetline(fp_in, line); diff --git a/src/sceneStructs.h b/src/sceneStructs.h index b38b820..20cd6bc 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -26,6 +26,7 @@ struct Geom { glm::mat4 transform; glm::mat4 inverseTransform; glm::mat4 invTranspose; + float speed; }; struct Material { @@ -49,6 +50,8 @@ struct Camera { glm::vec3 right; glm::vec2 fov; glm::vec2 pixelLength; + float lensSize; + float focalLength; }; struct RenderState { @@ -64,6 +67,7 @@ struct PathSegment { glm::vec3 color; int pixelIndex; int remainingBounces; + float time_diff; }; // Use with a corresponding PathSegment to do: @@ -72,5 +76,20 @@ struct PathSegment { struct ShadeableIntersection { float t; glm::vec3 surfaceNormal; + glm::vec3 point; int materialId; }; + + +struct cmp_material{ + __host__ __device__ bool operator()(const ShadeableIntersection& a, const ShadeableIntersection& b) { + return a.materialId > b.materialId; + } +}; + + + struct terminate_ray{ + __host__ __device__ bool operator()(const PathSegment& path){ + return path.remainingBounces > 0; + } + }; \ No newline at end of file diff --git a/stream_compaction/CMakeLists.txt b/stream_compaction/CMakeLists.txt index ac358c9..c4aebf5 100644 --- a/stream_compaction/CMakeLists.txt +++ b/stream_compaction/CMakeLists.txt @@ -1,4 +1,8 @@ set(SOURCE_FILES + "common.cu" + "common.h" + "efficient_sm.cu" + "efficient_sm.h" ) cuda_add_library(stream_compaction diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu new file mode 100644 index 0000000..d570d59 --- /dev/null +++ b/stream_compaction/common.cu @@ -0,0 +1,31 @@ +#include "common.h" + + +namespace StreamCompaction { + namespace Common { + + /** + * Maps an array to an array of 0s and 1s for stream compaction. Elements + * which map to 0 will be removed, and elements which map to 1 will be kept. + */ + __global__ void kernMapToBoolean(int n, int *bools, const PathSegment *idata) { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index < n){ + bools[index] = (idata[index].remainingBounces != 0); + } + } + + /** + * Performs scatter on an array. That is, for each element in idata, + * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. + */ + __global__ void kernScatter(int n, PathSegment *odata, + const PathSegment *idata, const int *bools, const int *indices) { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index < n && bools[index]){ + odata[indices[index]] = idata[index]; + } + } + + } +} diff --git a/stream_compaction/common.h b/stream_compaction/common.h new file mode 100644 index 0000000..207a169 --- /dev/null +++ b/stream_compaction/common.h @@ -0,0 +1,135 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define blockSize 128 + +#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) + +/** + * Check for CUDA errors; print and exit if there was a problem. + */ + +inline int ilog2(int x) { + int lg = 0; + while (x >>= 1) { + ++lg; + } + return lg; +} + +inline int ilog2ceil(int x) { + return x == 1 ? 0 : ilog2(x - 1) + 1; +} + +namespace StreamCompaction { + namespace Common { + __global__ void kernMapToBoolean(int n, int *bools, const PathSegment *idata); + + __global__ void kernScatter(int n, PathSegment *odata, + const PathSegment *idata, const int *bools, const int *indices); + + /** + * This class is used for timing the performance + * Uncopyable and unmovable + * + * Adapted from WindyDarian(https://github.com/WindyDarian) + */ + class PerformanceTimer + { + public: + PerformanceTimer() + { + cudaEventCreate(&event_start); + cudaEventCreate(&event_end); + } + + ~PerformanceTimer() + { + cudaEventDestroy(event_start); + cudaEventDestroy(event_end); + } + + void startCpuTimer() + { + if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); } + cpu_timer_started = true; + + time_start_cpu = std::chrono::high_resolution_clock::now(); + } + + void endCpuTimer() + { + time_end_cpu = std::chrono::high_resolution_clock::now(); + + if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); } + + std::chrono::duration duro = time_end_cpu - time_start_cpu; + prev_elapsed_time_cpu_milliseconds = + static_cast(duro.count()); + + cpu_timer_started = false; + } + + void startGpuTimer() + { + if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); } + gpu_timer_started = true; + + cudaEventRecord(event_start); + } + + void endGpuTimer() + { + cudaEventRecord(event_end); + cudaEventSynchronize(event_end); + + if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); } + + cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); + gpu_timer_started = false; + } + + float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015 + { + return prev_elapsed_time_cpu_milliseconds; + } + + float getGpuElapsedTimeForPreviousOperation() //noexcept + { + return prev_elapsed_time_gpu_milliseconds; + } + + // remove copy and move functions + PerformanceTimer(const PerformanceTimer&) = delete; + PerformanceTimer(PerformanceTimer&&) = delete; + PerformanceTimer& operator=(const PerformanceTimer&) = delete; + PerformanceTimer& operator=(PerformanceTimer&&) = delete; + + private: + cudaEvent_t event_start = nullptr; + cudaEvent_t event_end = nullptr; + + using time_point_t = std::chrono::high_resolution_clock::time_point; + time_point_t time_start_cpu; + time_point_t time_end_cpu; + + bool cpu_timer_started = false; + bool gpu_timer_started = false; + + float prev_elapsed_time_cpu_milliseconds = 0.f; + float prev_elapsed_time_gpu_milliseconds = 0.f; + }; + } +} diff --git a/stream_compaction/efficient_sm.cu b/stream_compaction/efficient_sm.cu new file mode 100644 index 0000000..0b14318 --- /dev/null +++ b/stream_compaction/efficient_sm.cu @@ -0,0 +1,135 @@ +#include +#include +#include "common.h" +#include "efficient_sm.h" + +namespace StreamCompaction { + namespace EfficientSM { + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + + + __global__ void kernEfficientScan(int N, int *odata, int *idata){ + extern __shared__ int tmp[]; + int index = threadIdx.x; + if (index >= N) return; + + int offset = 1; + tmp[2 * index] = idata[2 * index]; + tmp[2 * index + 1] = idata[2 * index + 1]; + // up sweep + for (int d = (N >> 1); d > 0; d >>= 1){ + __syncthreads(); + if (index < d) tmp[offset * (2 * index + 2) - 1] += tmp[offset * (2 * index + 1) - 1]; + offset <<= 1; + } + // clear last digit + if (index == 0) tmp[N - 1] = 0; + // down sweep + for (int d = 1; d < N; d <<= 1){ + offset >>= 1; + __syncthreads(); + if (index < d){ + int t = tmp[offset * (2 * index + 1) - 1]; + tmp[offset * (2 * index + 1) - 1] = tmp[offset * (2 * index + 2) - 1]; + tmp[offset * (2 * index + 2) - 1] += t; + } + } + __syncthreads(); + + odata[2 * index] = tmp[2 * index]; + odata[2 * index + 1] = tmp[2 * index + 1]; + } + + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ + void scan(int n, int *odata, const int *idata) { + int N = 1 << ilog2ceil(n); + dim3 fullBlockPerGrid((N + blockSize - 1) / blockSize); + int* dev_in, *dev_out; + + cudaMalloc((void**) &dev_in, N * sizeof(int)); + // checkCUDAError("cudaMalloc dev_in failed"); + + cudaMalloc((void**) &dev_out, N * sizeof(int)); + // checkCUDAError("cudaMalloc dev_out failed"); + + cudaMemset(dev_out, 0, sizeof(int) * N); + // checkCUDAError("cuda Memset failed"); + + cudaMemcpy(dev_in, idata, n * sizeof(int), cudaMemcpyHostToDevice); + // checkCUDAError("cudaMemcpy HostToDevice failed"); + + timer().startGpuTimer(); + + kernEfficientScan <<< fullBlockPerGrid, blockSize, 2 * N * sizeof(int) >>> (N, dev_out, dev_in); + // checkCUDAError("kernNaiveScan dev_in failed"); + + + timer().endGpuTimer(); + + cudaMemcpy(odata, dev_out, n * sizeof(int), cudaMemcpyDeviceToHost); + // checkCUDAError("cudaMemcpy DeviceToHost failed"); + + cudaFree(dev_in); + cudaFree(dev_out); + + } + + int compact(int n, PathSegment *odata, const PathSegment *idata, int* indices_buff) { + + dim3 fullBlockPerGrid((n + blockSize - 1) / blockSize); + int* bools, *indices; + + PathSegment *dev_in, *dev_out; + int num_element; + + cudaMalloc((void**)&bools, sizeof(int) * n); + // checkCUDAError("cudaMalloc bools failed"); + cudaMalloc((void**)&indices, sizeof(int) * n); + // checkCUDAError("cudaMalloc indices failed"); + cudaMalloc((void**)&dev_out, sizeof(PathSegment) * n); + // checkCUDAError("cudaMalloc dev_out failed"); + cudaMalloc((void**)&dev_in, sizeof(PathSegment) * n); + // checkCUDAError("cudaMalloc dev_in failed"); + + // lots of memcpy... + + cudaMemcpy(dev_in, idata, sizeof(PathSegment) * n, cudaMemcpyHostToDevice); + // checkCUDAError("cudaMemcpyHostToDevice failed"); + + timer().startGpuTimer(); + StreamCompaction::Common:: kernMapToBoolean<<>>(n, bools, dev_in); + // checkCUDAError("kernMapToBoolean failed"); + + cudaMemcpy(indices_buff, bools, sizeof(int) * n, cudaMemcpyDeviceToHost); + num_element = indices_buff[n - 1]; + // checkCUDAError("cudaMemcpyDeviceToHost failed"); + + scan(n, indices_buff, indices_buff); + num_element += indices_buff[n - 1]; + + cudaMemcpy(indices, indices_buff, sizeof(int) * n, cudaMemcpyHostToDevice); + // checkCUDAError("cudaMemcpyHostToDevice failed"); + + StreamCompaction::Common::kernScatter<<>>(n, dev_out, dev_in, bools, indices); + + timer().endGpuTimer(); + + cudaMemcpy(odata, dev_out, sizeof(PathSegment) * n, cudaMemcpyDeviceToHost); + // checkCUDAError("cudaMemcpyDeviceToHost failed"); + + cudaFree(bools); + cudaFree(indices); + cudaFree(dev_in); + cudaFree(dev_out); + + return num_element; + } + } +} diff --git a/stream_compaction/efficient_sm.h b/stream_compaction/efficient_sm.h new file mode 100644 index 0000000..2148d78 --- /dev/null +++ b/stream_compaction/efficient_sm.h @@ -0,0 +1,13 @@ +#pragma once + +#include "common.h" + + +namespace StreamCompaction { + namespace EfficientSM { + StreamCompaction::Common::PerformanceTimer& timer(); + + void scan(int n, int *odata, const int *idata); + int compact(int n, PathSegment *odata, const PathSegment *idata, int* indices_buff); + } +} \ No newline at end of file