diff --git a/CMakeLists.txt b/CMakeLists.txt
index 162568b..1aa258e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -73,6 +73,7 @@ set(headers
src/sceneStructs.h
src/preview.h
src/utilities.h
+ stream_compaction/common.h
)
set(sources
@@ -84,6 +85,7 @@ set(sources
src/scene.cpp
src/preview.cpp
src/utilities.cpp
+ stream_compaction/common.cu
)
set(imgui
diff --git a/README.md b/README.md
index f044c82..aecf29c 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,240 @@
-CUDA Denoiser For CUDA Path Tracer
-==================================
+Project 4 CUDA A Trous Denoiser
+======================
**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4**
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Raymond Yang
+ * [LinkedIn](https://www.linkedin.com/in/raymond-yang-b85b19168)
+ * Tested on:
+ * 10/22/2021
+ * Windows 10
+ * NVIDIA GeForce GTX 1080 Ti.
+ * Submitted on: 10/22/2021
-### (TODO: Your README)
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+## Introduction
+The objective of this project was to implement A Trous denoiser detailed in this [paper](https://jo.dreggn.org/home/2010_atrous.pdf). To construct the denoised image, we modulate a noisy path traced image (represented as a color buffer) with a geometry buffer/G buffer (represented as an array of positions and an array of normals). Both elements of the G buffer are cahced on first bounce and only done once per render.
+### Components of A Trous Denoiser
+Scene Colors:
+
+
+
+
+Scene Positions (relative to camera origin):
+
+
+
+
+Scene Normals:
+
+
+
+
+Scene Time to Intersect (unused):
+
+
+
+
+### Denoised Output
+
+
+
+
+## Performance Analysis
+In this section, we compare the runtime costs of A Trous denoising, recorded in microseconds. For Simple Scene, we will use `scene/cornell_ceiling_light.txt`. For Slightly Less Simple Scene, we will use `scene/demo.txt`. For the purposes of consistency between runs, the following values will be hardcoded to the window unless otherwise specified:
+* `Filter Size = 80`
+* `Color Weight = 8.f`
+* `Normal Weight = 1.f`
+* `Position Weight = 1.5f`
+
+### Simple Scene
+Baseline image for reference at 1000 iterations:
+
+
+
+Path trace time for 1000 iterations: 33679.50 ms
+Denoise time for 1000 iterations: 62.97 ms
+
+Path traced image after 10 iterations:
+
+
+
+
+Path traced image after 10 iterations with A Trous denoiser:
+
+
+
+
+#### Cost of Running Denoiser vs Path Tracer
+| Trial | 1 | 2 | 3 | 4 | 5 |
+|-----------------|----------|----------|----------|----------|----------|
+| Path Trace Time | 326.05 | 338.88 | 336.49 | 345.53 | 338.01 |
+| Denoise Time | 62.95 | 64.02 | 62.62 | 64.47 | 64.56 |
+
+Lower is better:
+
+
+
+
+#### Cost of Running Denoiser vs Resolution
+| Resolution | 256x256 | 512x512 | 1024x1024 |
+|------------|-----------|-----------|-----------|
+| Time (ms) | 4.76 | 20.41 | 116.61 |
+
+Lower is better:
+
+
+
+
+Lower is better:
+
+
+
+* The increase in time is proportional to the increase in pixelcount for smaller resolutions.
+
+#### Cost of Running Denoiser vs Filter Size
+| Filter Size | 25 | 50 | 100 |
+|-------------|-------|-------|-------|
+| Time (ms) | 45.66 | 60.41 | 65.53 |
+
+Lower is better:
+
+
+
+
+* Note that the increase in time is not substanctial indicating that I am likely calculating the number of iterations required for A Trous denoise incorrectly.
+
+### Slightly Less Simple Scene
+Baseline image for reference at 1000 iterations:
+
+
+
+Path trace time for 1000 iterations: 237650.00 ms
+Denoise time for 1000 iterations: 86.21 ms
+
+Path traced image after 10 iterations:
+
+
+
+
+Path traced image after 10 iterations with A Trous denoiser:
+
+
+
+
+#### Cost of Running Denoiser vs Path Tracer
+| Trial | 1 | 2 | 3 | 4 | 5 |
+|-----------------|-----------|-----------|-----------|-----------|-----------|
+| Path Trace Time | 2431.57 | 2365.28 | 2374.86 | 2480.54 | 2387.57 |
+| Denoise Time | 86.39 | 87.75 | 87.64 | 87.86 | 85.97 |
+
+Lower is better:
+
+
+
+
+#### Cost of Running Denoiser vs Resolution
+| Resolution | 256x256 | 512x512 | 1024x1024 |
+|------------|-----------|-----------|-----------|
+| Time (ms) | 6.61 | 30.41 | 134.44 |
+
+Lower is better:
+
+
+
+
+Lower is better:
+
+
+
+* The increase in time is proportional to the increase in pixelcount for tested resolutions.
+
+#### Cost of Running Denoiser vs Filter Size
+| Filter Size | 25 | 50 | 100 |
+|-------------|-------|-------|-------|
+| Time (ms) | 64.50 | 82.70 | 91.55 |
+
+Lower is better:
+
+
+
+
+* Note that the increase in time is not substanctial indicating that I am likely calculating the number of iterations required for A Trous denoise incorrectly.
+
+## Visual Analysis
+In this section, we compare visual effects of A Trous denoising.
+
+### Visual Results vs Filter Size
+Filter size is the two dimensional area around a pixel. That is, for a filter size of 25 and a pixel located at `(i,j)` of the image, the surrounding `5x5` grid centered around the pixel are factored into the weighting A Trous denoiser.
+* Since I am using a gaussian weighting spread of `5x5`, filter sizes below 25 are not considered because the area is smaller than the gaussian spread.
+* Filter sizes in `[25, 81)`, are mapped `1:1` to the gaussian spread. Filter sizes in `[81, 100]` are mapped 2:1 to the gaussian spread. That is, the gaussian weight at `(-2, -2)` relative to the pixel will actually be the pixel located at `(-4, -4)` relative to the pixel. Likewise, `(1, 1)` is mapped to `(2, 2)`.
+* NOTE: that this assumes I understand and implemented filter size correction. Given the observations above (runtime for a filter size of 50 is greater than runtime for a filter size of 25), it is very likely I have not implemented filter size correctly.
+
+`filterSize = 25`:
+
+
+
+
+`filterSize = 50`:
+
+
+
+
+`filterSize = 75`:
+
+
+
+
+`filterSize = 100`:
+
+
+
+
+The only substantive change seems to be between `filterSize = 25` and `filterSize = 50`.
+
+### Visual Results vs Material
+From left to right, the spheres are of properties reflective, refractive (with index of 1.5), and diffuse. Scene can be found in file `/scenes/cornell_ceiling_light_plus.txt`.
+
+1000 iterations:
+
+
+
+
+10 iterations:
+
+
+
+
+10 iterations with A Trous diffuse:
+
+
+
+
+With my implementation, A Trous denoiser fails to adequately simulate refractive properties. It is moderately capable of simulating diffuse properties. It is capable of simulating reflective properties.
+
+### Visual Results vs Lighting
+Small light uses scene: `/scenes/cornell.txt`
+Large light uses scene: `/scenes/cornell_ceiling_light.txt`
+
+Cornell Box with small ceiling light:
+
+
+
+
+Cornell Box with large ceiling light:
+
+
+
+
+Cornell Box with small ceiling light and A Trous denoise:
+
+
+
+
+Cornell Box with large ceiling light and A Trous denoise:
+
+
+
+
+Given poorer lighting conditions, more "holes" are left in the image. As the A Trous denoiser interpolates neighboring colors for each pixel, it emphasizes darker colors leading to poorer results.
\ No newline at end of file
diff --git a/img/a0.PNG b/img/a0.PNG
new file mode 100644
index 0000000..84c4ddd
Binary files /dev/null and b/img/a0.PNG differ
diff --git a/img/a1.PNG b/img/a1.PNG
new file mode 100644
index 0000000..bb2e760
Binary files /dev/null and b/img/a1.PNG differ
diff --git a/img/a2.PNG b/img/a2.PNG
new file mode 100644
index 0000000..3190dbf
Binary files /dev/null and b/img/a2.PNG differ
diff --git a/img/a3.PNG b/img/a3.PNG
new file mode 100644
index 0000000..81df20b
Binary files /dev/null and b/img/a3.PNG differ
diff --git a/img/a4.PNG b/img/a4.PNG
new file mode 100644
index 0000000..f2f05c9
Binary files /dev/null and b/img/a4.PNG differ
diff --git a/img/b0.PNG b/img/b0.PNG
new file mode 100644
index 0000000..4c55315
Binary files /dev/null and b/img/b0.PNG differ
diff --git a/img/b1.PNG b/img/b1.PNG
new file mode 100644
index 0000000..ff9cf86
Binary files /dev/null and b/img/b1.PNG differ
diff --git a/img/b2.PNG b/img/b2.PNG
new file mode 100644
index 0000000..4638966
Binary files /dev/null and b/img/b2.PNG differ
diff --git a/img/c0.PNG b/img/c0.PNG
new file mode 100644
index 0000000..be94eec
Binary files /dev/null and b/img/c0.PNG differ
diff --git a/img/c1.PNG b/img/c1.PNG
new file mode 100644
index 0000000..a142f83
Binary files /dev/null and b/img/c1.PNG differ
diff --git a/img/c2.PNG b/img/c2.PNG
new file mode 100644
index 0000000..76fc9f6
Binary files /dev/null and b/img/c2.PNG differ
diff --git a/img/c3.PNG b/img/c3.PNG
new file mode 100644
index 0000000..ed9b7d1
Binary files /dev/null and b/img/c3.PNG differ
diff --git a/img/d0.PNG b/img/d0.PNG
new file mode 100644
index 0000000..4022ab2
Binary files /dev/null and b/img/d0.PNG differ
diff --git a/img/d1.PNG b/img/d1.PNG
new file mode 100644
index 0000000..dc9d94c
Binary files /dev/null and b/img/d1.PNG differ
diff --git a/img/d2.PNG b/img/d2.PNG
new file mode 100644
index 0000000..79a2eb6
Binary files /dev/null and b/img/d2.PNG differ
diff --git a/img/e0.PNG b/img/e0.PNG
new file mode 100644
index 0000000..128a81e
Binary files /dev/null and b/img/e0.PNG differ
diff --git a/img/e1.PNG b/img/e1.PNG
new file mode 100644
index 0000000..fb17cfc
Binary files /dev/null and b/img/e1.PNG differ
diff --git a/img/e2.PNG b/img/e2.PNG
new file mode 100644
index 0000000..7e122f5
Binary files /dev/null and b/img/e2.PNG differ
diff --git a/img/e3.PNG b/img/e3.PNG
new file mode 100644
index 0000000..0dd71e8
Binary files /dev/null and b/img/e3.PNG differ
diff --git a/img/f0.PNG b/img/f0.PNG
new file mode 100644
index 0000000..a2b7913
Binary files /dev/null and b/img/f0.PNG differ
diff --git a/img/f1.PNG b/img/f1.PNG
new file mode 100644
index 0000000..a35d9f5
Binary files /dev/null and b/img/f1.PNG differ
diff --git a/img/f2.PNG b/img/f2.PNG
new file mode 100644
index 0000000..4b0c1bb
Binary files /dev/null and b/img/f2.PNG differ
diff --git a/img/f3.PNG b/img/f3.PNG
new file mode 100644
index 0000000..da4cc69
Binary files /dev/null and b/img/f3.PNG differ
diff --git a/img/g0.PNG b/img/g0.PNG
new file mode 100644
index 0000000..ed94eb8
Binary files /dev/null and b/img/g0.PNG differ
diff --git a/img/g1.PNG b/img/g1.PNG
new file mode 100644
index 0000000..b75bfe4
Binary files /dev/null and b/img/g1.PNG differ
diff --git a/img/g2.PNG b/img/g2.PNG
new file mode 100644
index 0000000..095929f
Binary files /dev/null and b/img/g2.PNG differ
diff --git a/img/h0.PNG b/img/h0.PNG
new file mode 100644
index 0000000..4542536
Binary files /dev/null and b/img/h0.PNG differ
diff --git a/img/h1.PNG b/img/h1.PNG
new file mode 100644
index 0000000..4274c97
Binary files /dev/null and b/img/h1.PNG differ
diff --git a/img/h2.PNG b/img/h2.PNG
new file mode 100644
index 0000000..78d2d80
Binary files /dev/null and b/img/h2.PNG differ
diff --git a/img/h3.PNG b/img/h3.PNG
new file mode 100644
index 0000000..7214bcb
Binary files /dev/null and b/img/h3.PNG differ
diff --git a/scenes/cornell.txt b/scenes/cornell.txt
index 83ff820..77ad551 100644
--- a/scenes/cornell.txt
+++ b/scenes/cornell.txt
@@ -52,7 +52,7 @@ EMITTANCE 0
CAMERA
RES 800 800
FOVY 45
-ITERATIONS 5000
+ITERATIONS 10
DEPTH 8
FILE cornell
EYE 0.0 5 10.5
diff --git a/scenes/cornell_ceiling_light_plus.txt b/scenes/cornell_ceiling_light_plus.txt
new file mode 100644
index 0000000..54a596b
--- /dev/null
+++ b/scenes/cornell_ceiling_light_plus.txt
@@ -0,0 +1,153 @@
+// Emissive material (light)
+MATERIAL 0
+RGB 1 1 1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 1
+
+// Diffuse white
+MATERIAL 1
+RGB .98 .98 .98
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse red
+MATERIAL 2
+RGB .85 .35 .35
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse green
+MATERIAL 3
+RGB .35 .85 .35
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Specular white
+MATERIAL 4
+RGB .98 .98 .98
+SPECEX 0
+SPECRGB .98 .98 .98
+REFL 1
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Refractive white
+MATERIAL 5
+RGB .98 .98 .98
+SPECEX 0
+SPECRGB .98 .98 .98
+REFL 0
+REFR 1
+REFRIOR 1.5
+EMITTANCE 0
+
+// Diffuse white
+MATERIAL 6
+RGB .98 .98 .98
+SPECEX 0
+SPECRGB .98 .98 .98
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Camera
+CAMERA
+RES 800 800
+FOVY 45
+ITERATIONS 10
+DEPTH 8
+FILE cornell
+EYE 0.0 5 10.5
+LOOKAT 0 5 0
+UP 0 1 0
+
+
+// Ceiling light
+OBJECT 0
+cube
+material 0
+TRANS 0 10 0
+ROTAT 0 0 0
+SCALE 10 .3 10
+
+// Floor
+OBJECT 1
+cube
+material 1
+TRANS 0 0 0
+ROTAT 0 0 0
+SCALE 10 .01 10
+
+// Ceiling
+OBJECT 2
+cube
+material 1
+TRANS 0 10 0
+ROTAT 0 0 90
+SCALE .01 10 10
+
+// Back wall
+OBJECT 3
+cube
+material 1
+TRANS 0 5 -5
+ROTAT 0 90 0
+SCALE .01 10 10
+
+// Left wall
+OBJECT 4
+cube
+material 2
+TRANS -5 5 0
+ROTAT 0 0 0
+SCALE .01 10 10
+
+// Right wall
+OBJECT 5
+cube
+material 3
+TRANS 5 5 0
+ROTAT 0 0 0
+SCALE .01 10 10
+
+// Sphere
+OBJECT 6
+sphere
+material 4
+TRANS -4 4 -1
+ROTAT 0 0 0
+SCALE 3 3 3
+
+// Sphere
+OBJECT 7
+sphere
+material 5
+TRANS 0 4 -1
+ROTAT 0 0 0
+SCALE 3 3 3
+
+// Sphere
+OBJECT 8
+sphere
+material 6
+TRANS 4 4 -1
+ROTAT 0 0 0
+SCALE 3 3 3
diff --git a/scenes/demo.txt b/scenes/demo.txt
new file mode 100644
index 0000000..5534bba
--- /dev/null
+++ b/scenes/demo.txt
@@ -0,0 +1,528 @@
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Light White
+MATERIAL 0
+RGB 1 1 1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 7
+
+// Reflective
+MATERIAL 1
+RGB 1 1 1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 1
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 2
+RGB 0.35 0.85 0.85
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 3
+RGB 0.85 0.35 0.85
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 4
+RGB 0.85 0.85 0.35
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 5
+RGB 0.35 0.85 0.35
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 6
+RGB 0.35 0.35 0.85
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Refractive
+MATERIAL 7
+RGB 1 1 1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 1
+REFRIOR 1.5
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 8
+RGB 0.98 0.98 0.98
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 9
+RGB 0.43 0.34 0.24
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Diffuse
+MATERIAL 10
+RGB 0.5 0.5 0.5
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+// Light White
+MATERIAL 11
+RGB 1 0.1 1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 7
+
+// Light White
+MATERIAL 12
+RGB 1 0.1 0.1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 7
+
+// Light White
+MATERIAL 13
+RGB 0.1 0.1 1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 7
+
+// Light White
+MATERIAL 14
+RGB 0.1 1 0.1
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0
+REFR 0
+REFRIOR 0
+EMITTANCE 7
+
+// Diffuse Reflective
+MATERIAL 15
+RGB 0.35 0.75 0.35
+SPECEX 0
+SPECRGB 0 0 0
+REFL 0.4
+REFR 0
+REFRIOR 0
+EMITTANCE 0
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Camera
+CAMERA
+RES 800 800
+FOVY 45
+ITERATIONS 10
+DEPTH 12
+FILE cornell
+EYE 0 -10 50
+LOOKAT 0 30 0
+UP 0 1 0
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Ceiling light
+OBJECT 0
+cube
+material 11
+TRANS -20 80 0
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 1
+cube
+material 11
+TRANS 20 80 0
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 2
+cube
+material 0
+TRANS 0 80 0
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 3
+cube
+material 12
+TRANS -20 80 -20
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 4
+cube
+material 13
+TRANS 20 80 -20
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 5
+cube
+material 14
+TRANS 0 80 -20
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 6
+cube
+material 13
+TRANS -20 80 20
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 7
+cube
+material 12
+TRANS 20 80 20
+ROTAT 0 0 0
+SCALE 10 1 10
+
+// Ceiling light
+OBJECT 8
+cube
+material 14
+TRANS 0 80 20
+ROTAT 0 0 0
+SCALE 10 1 10
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Cube
+OBJECT 8
+cube
+material 7
+TRANS -12 38.960193405 0
+ROTAT 4 60 53
+SCALE 3.375 3.375 3.375
+
+// Cube
+OBJECT 9
+cube
+material 1
+TRANS -12 32.9939799387 0
+ROTAT 42 25 28
+SCALE 5.0625 5.0625 5.0625
+
+// Cube
+OBJECT 10
+cube
+material 7
+TRANS -12 24.0446597393 0
+ROTAT 19 63 72
+SCALE 7.59375 7.59375 7.59375
+
+// Cube
+OBJECT 11
+cube
+material 7
+TRANS -12 15.0953395399 0
+ROTAT 32 0 58
+SCALE 5.0625 5.0625 5.0625
+
+// Cube
+OBJECT 12
+cube
+material 7
+TRANS -12 9.12912607362 0
+ROTAT 80 14 27
+SCALE 3.375 3.375 3.375
+
+// Cube
+OBJECT 13
+cube
+material 1
+TRANS -12 5.15165042945 0
+ROTAT 67 69 47
+SCALE 2.25 2.25 2.25
+
+// Cube
+OBJECT 14
+cube
+material 7
+TRANS -12 2.5 0
+ROTAT 73 3 57
+SCALE 1.5 1.5 1.5
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Cube
+OBJECT 15
+cube
+material 1
+TRANS 12 47.6954830935 0
+ROTAT 65 29 48
+SCALE 3.375 3.375 3.375
+
+// Cube
+OBJECT 16
+cube
+material 1
+TRANS 12 41.7292696272 0
+ROTAT 44 16 24
+SCALE 5.0625 5.0625 5.0625
+
+// Cube
+OBJECT 17
+cube
+material 7
+TRANS 12 35.7630561609 0
+ROTAT 34 41 88
+SCALE 3.375 3.375 3.375
+
+// Cube
+OBJECT 18
+cube
+material 7
+TRANS 12 31.7855805167 0
+ROTAT 52 59 49
+SCALE 2.25 2.25 2.25
+
+// Cube
+OBJECT 19
+cube
+material 7
+TRANS 12 29.1339300873 0
+ROTAT 84 70 54
+SCALE 1.5 1.5 1.5
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Cube
+OBJECT 20
+cube
+material 7
+TRANS 0 43.3278382492 12
+ROTAT 18 21 68
+SCALE 7.529536 7.529536 7.529536
+
+// Cube
+OBJECT 21
+cube
+material 7
+TRANS 0 35.6914949854 12
+ROTAT 71 41 16
+SCALE 5.37824 5.37824 5.37824
+
+// Cube
+OBJECT 22
+cube
+material 7
+TRANS 0 30.2369640827 12
+ROTAT 36 20 1
+SCALE 3.8416 3.8416 3.8416
+
+// Cube
+OBJECT 23
+cube
+material 1
+TRANS 0 26.3408705807 12
+ROTAT 87 76 24
+SCALE 2.744 2.744 2.744
+
+// Cube
+OBJECT 24
+cube
+material 1
+TRANS 0 23.5579466507 12
+ROTAT 34 54 6
+SCALE 1.96 1.96 1.96
+
+// Cube
+OBJECT 25
+cube
+material 7
+TRANS 0 21.5701438436 12
+ROTAT 62 74 44
+SCALE 1.4 1.4 1.4
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Cube
+OBJECT 26
+cube
+material 7
+TRANS 0 43.3278382492 -12
+ROTAT 55 26 0
+SCALE 7.529536 7.529536 7.529536
+
+// Cube
+OBJECT 27
+cube
+material 1
+TRANS 0 35.6914949854 -12
+ROTAT 29 75 49
+SCALE 5.37824 5.37824 5.37824
+
+// Cube
+OBJECT 28
+cube
+material 7
+TRANS 0 30.2369640827 -12
+ROTAT 13 40 53
+SCALE 3.8416 3.8416 3.8416
+
+// Cube
+OBJECT 29
+cube
+material 7
+TRANS 0 26.3408705807 -12
+ROTAT 82 83 8
+SCALE 2.744 2.744 2.744
+
+// Cube
+OBJECT 30
+cube
+material 7
+TRANS 0 23.5579466507 -12
+ROTAT 39 88 60
+SCALE 1.96 1.96 1.96
+
+// Cube
+OBJECT 31
+cube
+material 1
+TRANS 0 21.5701438436 -12
+ROTAT 4 15 84
+SCALE 1.4 1.4 1.4
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// Cube
+OBJECT 32
+cube
+material 9
+TRANS 0 53.3278382492 0
+ROTAT 0 0 20
+SCALE 27 1.3 3
+
+// Cube
+OBJECT 33
+cube
+material 9
+TRANS 0 54.5278382492 0
+ROTAT 0 0 20
+SCALE 3 1.3 27
+
+// Cube
+OBJECT 34
+cube
+material 8
+TRANS -12 27.9139191246 0
+ROTAT 0 0 0
+SCALE 0.5 45.8278382492 0.5
+
+// Cube
+OBJECT 35
+cube
+material 8
+TRANS 12 45.2308841683 0
+ROTAT 0 0 0
+SCALE 0.5 28.1939081619 0.5
+
+// Cube
+OBJECT 36
+cube
+material 8
+TRANS 0 38.2308841683 12
+ROTAT 0 0 0
+SCALE 0.5 35.1939081619 0.5
+
+// Cube
+OBJECT 37
+cube
+material 8
+TRANS 0 38.2308841683 -12
+ROTAT 0 0 0
+SCALE 0.5 35.1939081619 0.5
+
+
+
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////
+
+// World
+OBJECT 38
+cube
+material 15
+TRANS 0 -10 0
+ROTAT 0 0 0
+SCALE 200 200 200
\ No newline at end of file
diff --git a/src/interactions.h b/src/interactions.h
index 144a9f5..f11c08f 100644
--- a/src/interactions.h
+++ b/src/interactions.h
@@ -1,5 +1,6 @@
#pragma once
+#define EPSILON_SCALE 10
#include "intersections.h"
/**
@@ -45,18 +46,52 @@ glm::vec3 calculateRandomDirectionInHemisphere(
*/
__host__ __device__
void scatterRay(
- PathSegment & pathSegment,
- glm::vec3 intersect,
- glm::vec3 normal,
- const Material &m,
- thrust::default_random_engine &rng) {
- glm::vec3 newDirection;
- if (m.hasReflective) {
- newDirection = glm::reflect(pathSegment.ray.direction, normal);
- } else {
- newDirection = calculateRandomDirectionInHemisphere(normal, rng);
+ PathSegment& pathSegment,
+ glm::vec3 intersect,
+ glm::vec3 normal,
+ const Material& m,
+ thrust::default_random_engine& rng) {
+ // TODO: implement this.
+ // A basic implementation of pure-diffuse shading will just call the
+ // calculateRandomDirectionInHemisphere defined above.
+
+ thrust::uniform_real_distribution u01(0, 1);
+ float prob = u01(rng);
+ glm::vec3 rayVec = glm::normalize(pathSegment.ray.direction);
+ glm::vec3 norVec = glm::normalize(normal);
+
+ if (prob < m.hasReflective) {
+ // Reflection
+ pathSegment.ray.origin = intersect + (float)EPSILON * EPSILON_SCALE * norVec;
+ pathSegment.ray.direction = glm::normalize(glm::reflect(rayVec, norVec));
}
+ else if (prob < (m.hasReflective + m.hasRefractive)) {
+ // Refraction
+ // Reference: https://raytracing.github.io/books/RayTracingInOneWeekend.html#dielectrics
+ float refractionRatio = (glm::dot(rayVec, norVec) > 0) ? m.indexOfRefraction : (1.0f / m.indexOfRefraction); // inside sphere : outside sphere
- pathSegment.ray.direction = newDirection;
- pathSegment.ray.origin = intersect + (newDirection * 0.0001f);
-}
+ float cosTheta = glm::min(glm::dot(-1.0f * rayVec, norVec), 1.0f);
+ float sinTheta = sqrt(1.0 - cosTheta * cosTheta);
+ bool cannotRefract = refractionRatio * sinTheta > 1.0f;
+
+ float r0 = (1 - refractionRatio) / (1 + refractionRatio);
+ r0 *= r0;
+ float schlickAppro = r0 + (1 - r0) * pow(1 - cosTheta, 5);
+ bool schlickBool = schlickAppro > u01(rng);
+
+ pathSegment.ray.origin = intersect + (float)EPSILON * EPSILON_SCALE * rayVec;
+ if (cannotRefract || schlickBool) {
+ pathSegment.ray.direction = glm::reflect(rayVec, norVec);
+ }
+ else {
+ // Snell's Law
+ pathSegment.ray.direction = glm::refract(rayVec, norVec, refractionRatio);
+ }
+ }
+ else {
+ // Diffusion
+ pathSegment.ray.origin = intersect + (float)EPSILON * EPSILON_SCALE * norVec;
+ pathSegment.ray.direction = glm::normalize(calculateRandomDirectionInHemisphere(norVec, rng));
+ }
+ pathSegment.color *= m.color;
+}
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 4092ae4..d7a10d7 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -6,6 +6,13 @@
#include "../imgui/imgui_impl_glfw.h"
#include "../imgui/imgui_impl_opengl3.h"
+/*******************************************************************
+* TIMER 0 for no timer
+* TIMER 1 for timing path tracer and denoise
+*******************************************************************/
+#define TIMER 1
+
+
static std::string startTimeString;
// For camera controls
@@ -25,9 +32,9 @@ int lastLoopIterations = 0;
bool ui_showGbuffer = false;
bool ui_denoise = false;
int ui_filterSize = 80;
-float ui_colorWeight = 0.45f;
-float ui_normalWeight = 0.35f;
-float ui_positionWeight = 0.2f;
+float ui_colorWeight = 8.f;
+float ui_normalWeight = 1.f;
+float ui_positionWeight = 1.5f;
bool ui_saveAndExit = false;
static bool camchanged = true;
@@ -45,6 +52,17 @@ int iteration;
int width;
int height;
+static float timePT;
+static float timeAT;
+static bool hasPrinted;
+using StreamCompaction::Common::PerformanceTimer;
+PerformanceTimer& timer()
+{
+ static PerformanceTimer timer;
+ return timer;
+}
+
+
//-------------------------------
//-------------MAIN--------------
//-------------------------------
@@ -144,12 +162,15 @@ void runCuda() {
cameraPosition += cam.lookAt;
cam.position = cameraPosition;
camchanged = false;
+
+ //std::cout << cam.position.x << " " << cam.position.y << " " << cam.position.z << " " << std::endl;
}
// Map OpenGL buffer object for writing from CUDA on a single GPU
- // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
+ // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
if (iteration == 0) {
+ hasPrinted = false;
pathtraceFree();
pathtraceInit(scene);
}
@@ -157,18 +178,53 @@ void runCuda() {
uchar4 *pbo_dptr = NULL;
cudaGLMapBufferObject((void**)&pbo_dptr, pbo);
+ int num_paths = -1;
if (iteration < ui_iterations) {
iteration++;
// execute the kernel
int frame = 0;
- pathtrace(frame, iteration);
+#if TIMER
+ // Start Timer
+ if (iteration == 1)
+ {
+ timePT = 0.f;
+ }
+ timer().startCpuTimer();
+#endif // TIMER
+ num_paths = pathtrace(frame, iteration, iteration == ui_iterations);
+#if TIMER
+ timer().endCpuTimer();
+ timePT += timer().getCpuElapsedTimeForPreviousOperation();
+ if (iteration == ui_iterations) {
+ std::cout << "Path-trace time for " << iteration << " iterations: " << timePT << "ms" << std::endl;
+ }
+#endif // TIMER
+ }
+
+ if (iteration == ui_iterations) {
+#if TIMER
+ // Start Timer
+ timeAT = 0.f;
+ if (!hasPrinted) {
+ timer().startCpuTimer();
+ }
+#endif // TIMER
+ denoise(ui_filterSize, ui_colorWeight, ui_positionWeight, ui_normalWeight);
+#if TIMER
+ if (!hasPrinted) {
+ hasPrinted = true;
+ timer().endCpuTimer();
+ timeAT += timer().getCpuElapsedTimeForPreviousOperation();
+ std::cout << "Denoise time for " << iteration << " iterations: " << timeAT << "ms\n\n" << std::endl;
+ }
+#endif // TIMER
}
if (ui_showGbuffer) {
showGBuffer(pbo_dptr);
} else {
- showImage(pbo_dptr, iteration);
+ showImage(pbo_dptr, iteration, ui_denoise);
}
// unmap buffer object
diff --git a/src/main.h b/src/main.h
index 06d311a..8e3f281 100644
--- a/src/main.h
+++ b/src/main.h
@@ -19,6 +19,7 @@
#include "pathtrace.h"
#include "utilities.h"
#include "scene.h"
+#include "../stream_compaction/common.h"
using namespace std;
@@ -46,3 +47,4 @@ void runCuda();
void keyCallback(GLFWwindow *window, int key, int scancode, int action, int mods);
void mousePositionCallback(GLFWwindow* window, double xpos, double ypos);
void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods);
+
diff --git a/src/pathtrace.cu b/src/pathtrace.cu
index 23e5f90..f2dcbd7 100644
--- a/src/pathtrace.cu
+++ b/src/pathtrace.cu
@@ -1,433 +1,584 @@
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include "sceneStructs.h"
-#include "scene.h"
-#include "glm/glm.hpp"
-#include "glm/gtx/norm.hpp"
-#include "utilities.h"
-#include "pathtrace.h"
-#include "intersections.h"
-#include "interactions.h"
-
-#define ERRORCHECK 1
-
-#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
-#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
-void checkCUDAErrorFn(const char *msg, const char *file, int line) {
-#if ERRORCHECK
- cudaDeviceSynchronize();
- cudaError_t err = cudaGetLastError();
- if (cudaSuccess == err) {
- return;
- }
-
- fprintf(stderr, "CUDA error");
- if (file) {
- fprintf(stderr, " (%s:%d)", file, line);
- }
- fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
-# ifdef _WIN32
- getchar();
-# endif
- exit(EXIT_FAILURE);
-#endif
-}
-
-__host__ __device__
-thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) {
- int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index);
- return thrust::default_random_engine(h);
-}
-
-//Kernel that writes the image to the OpenGL PBO directly.
-__global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
- int iter, glm::vec3* image) {
- int x = (blockIdx.x * blockDim.x) + threadIdx.x;
- int y = (blockIdx.y * blockDim.y) + threadIdx.y;
-
- if (x < resolution.x && y < resolution.y) {
- int index = x + (y * resolution.x);
- glm::vec3 pix = image[index];
-
- glm::ivec3 color;
- color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255);
- color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255);
- color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255);
-
- // Each thread writes one pixel location in the texture (textel)
- pbo[index].w = 0;
- pbo[index].x = color.x;
- pbo[index].y = color.y;
- pbo[index].z = color.z;
- }
-}
-
-__global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) {
- int x = (blockIdx.x * blockDim.x) + threadIdx.x;
- int y = (blockIdx.y * blockDim.y) + threadIdx.y;
-
- if (x < resolution.x && y < resolution.y) {
- int index = x + (y * resolution.x);
- float timeToIntersect = gBuffer[index].t * 256.0;
-
- pbo[index].w = 0;
- pbo[index].x = timeToIntersect;
- pbo[index].y = timeToIntersect;
- pbo[index].z = timeToIntersect;
- }
-}
-
-static Scene * hst_scene = NULL;
-static glm::vec3 * dev_image = NULL;
-static Geom * dev_geoms = NULL;
-static Material * dev_materials = NULL;
-static PathSegment * dev_paths = NULL;
-static ShadeableIntersection * dev_intersections = NULL;
-static GBufferPixel* dev_gBuffer = NULL;
-// TODO: static variables for device memory, any extra info you need, etc
-// ...
-
-void pathtraceInit(Scene *scene) {
- hst_scene = scene;
- const Camera &cam = hst_scene->state.camera;
- const int pixelcount = cam.resolution.x * cam.resolution.y;
-
- cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
- cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
-
- cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
-
- cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
- cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
-
- cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
- cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
-
- cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
- cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
-
- cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel));
-
- // TODO: initialize any extra device memeory you need
-
- checkCUDAError("pathtraceInit");
-}
-
-void pathtraceFree() {
- cudaFree(dev_image); // no-op if dev_image is null
- cudaFree(dev_paths);
- cudaFree(dev_geoms);
- cudaFree(dev_materials);
- cudaFree(dev_intersections);
- cudaFree(dev_gBuffer);
- // TODO: clean up any extra device memory you created
-
- checkCUDAError("pathtraceFree");
-}
-
-/**
-* Generate PathSegments with rays from the camera through the screen into the
-* scene, which is the first bounce of rays.
-*
-* Antialiasing - add rays for sub-pixel sampling
-* motion blur - jitter rays "in time"
-* lens effect - jitter ray origin positions based on a lens
-*/
-__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments)
-{
- int x = (blockIdx.x * blockDim.x) + threadIdx.x;
- int y = (blockIdx.y * blockDim.y) + threadIdx.y;
-
- if (x < cam.resolution.x && y < cam.resolution.y) {
- int index = x + (y * cam.resolution.x);
- PathSegment & segment = pathSegments[index];
-
- segment.ray.origin = cam.position;
- segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
-
- segment.ray.direction = glm::normalize(cam.view
- - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
- - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
- );
-
- segment.pixelIndex = index;
- segment.remainingBounces = traceDepth;
- }
-}
-
-__global__ void computeIntersections(
- int depth
- , int num_paths
- , PathSegment * pathSegments
- , Geom * geoms
- , int geoms_size
- , ShadeableIntersection * intersections
- )
-{
- int path_index = blockIdx.x * blockDim.x + threadIdx.x;
-
- if (path_index < num_paths)
- {
- PathSegment pathSegment = pathSegments[path_index];
-
- float t;
- glm::vec3 intersect_point;
- glm::vec3 normal;
- float t_min = FLT_MAX;
- int hit_geom_index = -1;
- bool outside = true;
-
- glm::vec3 tmp_intersect;
- glm::vec3 tmp_normal;
-
- // naive parse through global geoms
-
- for (int i = 0; i < geoms_size; i++)
- {
- Geom & geom = geoms[i];
-
- if (geom.type == CUBE)
- {
- t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
- }
- else if (geom.type == SPHERE)
- {
- t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
- }
-
- // Compute the minimum t from the intersection tests to determine what
- // scene geometry object was hit first.
- if (t > 0.0f && t_min > t)
- {
- t_min = t;
- hit_geom_index = i;
- intersect_point = tmp_intersect;
- normal = tmp_normal;
- }
- }
-
- if (hit_geom_index == -1)
- {
- intersections[path_index].t = -1.0f;
- }
- else
- {
- //The ray hits something
- intersections[path_index].t = t_min;
- intersections[path_index].materialId = geoms[hit_geom_index].materialid;
- intersections[path_index].surfaceNormal = normal;
- }
- }
-}
-
-__global__ void shadeSimpleMaterials (
- int iter
- , int num_paths
- , ShadeableIntersection * shadeableIntersections
- , PathSegment * pathSegments
- , Material * materials
- )
-{
- int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < num_paths)
- {
- ShadeableIntersection intersection = shadeableIntersections[idx];
- PathSegment segment = pathSegments[idx];
- if (segment.remainingBounces == 0) {
- return;
- }
-
- if (intersection.t > 0.0f) { // if the intersection exists...
- segment.remainingBounces--;
- // Set up the RNG
- thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces);
-
- Material material = materials[intersection.materialId];
- glm::vec3 materialColor = material.color;
-
- // If the material indicates that the object was a light, "light" the ray
- if (material.emittance > 0.0f) {
- segment.color *= (materialColor * material.emittance);
- segment.remainingBounces = 0;
- }
- else {
- segment.color *= materialColor;
- glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin;
- scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng);
- }
- // If there was no intersection, color the ray black.
- // Lots of renderers use 4 channel color, RGBA, where A = alpha, often
- // used for opacity, in which case they can indicate "no opacity".
- // This can be useful for post-processing and image compositing.
- } else {
- segment.color = glm::vec3(0.0f);
- segment.remainingBounces = 0;
- }
-
- pathSegments[idx] = segment;
- }
-}
-
-__global__ void generateGBuffer (
- int num_paths,
- ShadeableIntersection* shadeableIntersections,
- PathSegment* pathSegments,
- GBufferPixel* gBuffer) {
- int idx = blockIdx.x * blockDim.x + threadIdx.x;
- if (idx < num_paths)
- {
- gBuffer[idx].t = shadeableIntersections[idx].t;
- }
-}
-
-// Add the current iteration's output to the overall image
-__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths)
-{
- int index = (blockIdx.x * blockDim.x) + threadIdx.x;
-
- if (index < nPaths)
- {
- PathSegment iterationPath = iterationPaths[index];
- image[iterationPath.pixelIndex] += iterationPath.color;
- }
-}
-
-/**
- * Wrapper for the __global__ call that sets up the kernel calls and does a ton
- * of memory management
- */
-void pathtrace(int frame, int iter) {
- const int traceDepth = hst_scene->state.traceDepth;
- const Camera &cam = hst_scene->state.camera;
- const int pixelcount = cam.resolution.x * cam.resolution.y;
-
- // 2D block for generating ray from camera
- const dim3 blockSize2d(8, 8);
- const dim3 blocksPerGrid2d(
- (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
- (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
-
- // 1D block for path tracing
- const int blockSize1d = 128;
-
- ///////////////////////////////////////////////////////////////////////////
-
- // Pathtracing Recap:
- // * Initialize array of path rays (using rays that come out of the camera)
- // * You can pass the Camera object to that kernel.
- // * Each path ray must carry at minimum a (ray, color) pair,
- // * where color starts as the multiplicative identity, white = (1, 1, 1).
- // * This has already been done for you.
- // * NEW: For the first depth, generate geometry buffers (gbuffers)
- // * For each depth:
- // * Compute an intersection in the scene for each path ray.
- // A very naive version of this has been implemented for you, but feel
- // free to add more primitives and/or a better algorithm.
- // Currently, intersection distance is recorded as a parametric distance,
- // t, or a "distance along the ray." t = -1.0 indicates no intersection.
- // * Color is attenuated (multiplied) by reflections off of any object
- // * Stream compact away all of the terminated paths.
- // You may use either your implementation or `thrust::remove_if` or its
- // cousins.
- // * Note that you can't really use a 2D kernel launch any more - switch
- // to 1D.
- // * Shade the rays that intersected something or didn't bottom out.
- // That is, color the ray by performing a color computation according
- // to the shader, then generate a new ray to continue the ray path.
- // We recommend just updating the ray's PathSegment in place.
- // Note that this step may come before or after stream compaction,
- // since some shaders you write may also cause a path to terminate.
- // * Finally:
- // * if not denoising, add this iteration's results to the image
- // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl
-
- generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths);
- checkCUDAError("generate camera ray");
-
- int depth = 0;
- PathSegment* dev_path_end = dev_paths + pixelcount;
- int num_paths = dev_path_end - dev_paths;
-
- // --- PathSegment Tracing Stage ---
- // Shoot ray into scene, bounce between objects, push shading chunks
-
- // Empty gbuffer
- cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel));
-
- // clean shading chunks
- cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
-
- bool iterationComplete = false;
- while (!iterationComplete) {
-
- // tracing
- dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
- computeIntersections <<>> (
- depth
- , num_paths
- , dev_paths
- , dev_geoms
- , hst_scene->geoms.size()
- , dev_intersections
- );
- checkCUDAError("trace one bounce");
- cudaDeviceSynchronize();
-
- if (depth == 0) {
- generateGBuffer<<>>(num_paths, dev_intersections, dev_paths, dev_gBuffer);
- }
-
- depth++;
-
- shadeSimpleMaterials<<>> (
- iter,
- num_paths,
- dev_intersections,
- dev_paths,
- dev_materials
- );
- iterationComplete = depth == traceDepth;
- }
-
- // Assemble this iteration and apply it to the image
- dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
- finalGather<<>>(num_paths, dev_image, dev_paths);
-
- ///////////////////////////////////////////////////////////////////////////
-
- // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images.
- // Otherwise, screenshots are also acceptable.
- // Retrieve image from GPU
- cudaMemcpy(hst_scene->state.image.data(), dev_image,
- pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
-
- checkCUDAError("pathtrace");
-}
-
-// CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging.
-void showGBuffer(uchar4* pbo) {
- const Camera &cam = hst_scene->state.camera;
- const dim3 blockSize2d(8, 8);
- const dim3 blocksPerGrid2d(
- (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
- (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
-
- // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization
- gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer);
-}
-
-void showImage(uchar4* pbo, int iter) {
-const Camera &cam = hst_scene->state.camera;
- const dim3 blockSize2d(8, 8);
- const dim3 blocksPerGrid2d(
- (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
- (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
-
- // Send results to OpenGL buffer for rendering
- sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image);
-}
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "sceneStructs.h"
+#include "scene.h"
+#include "glm/glm.hpp"
+#include "glm/gtx/norm.hpp"
+#include "utilities.h"
+#include "pathtrace.h"
+#include "intersections.h"
+#include "interactions.h"
+
+#define ERRORCHECK 0
+
+/*******************************************************************
+* GBUFFER_RENDER 0 for gbufferToPBO to render intersects as color
+* GBUFFER_RENDER 1 for gbufferToPBO to render positions as color
+* GBUFFER_RENDER 2 for gbufferToPBO to render normals as color
+*******************************************************************/
+#define GBUFFER_RENDER 0
+
+/*******************************************************************
+* BLOCK_LENGTH is 1D lenght of blocks per grid
+*******************************************************************/
+#define BLOCK_LENGTH 8
+// TODO: update this to be variable length given FILTER_LENGTH
+const float gaussian[25] = {
+ 0.003765, 0.015019, 0.023792, 0.015019, 0.003765,
+ 0.015019, 0.059912, 0.094907, 0.059912, 0.015019,
+ 0.023792, 0.094907, 0.150342, 0.094907, 0.023792,
+ 0.015019, 0.059912, 0.094907, 0.059912, 0.015019,
+ 0.003765, 0.015019, 0.023792, 0.015019, 0.003765 };
+
+#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
+void checkCUDAErrorFn(const char *msg, const char *file, int line) {
+#if ERRORCHECK
+ cudaDeviceSynchronize();
+ cudaError_t err = cudaGetLastError();
+ if (cudaSuccess == err) {
+ return;
+ }
+
+ fprintf(stderr, "CUDA error");
+ if (file) {
+ fprintf(stderr, " (%s:%d)", file, line);
+ }
+ fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
+# ifdef _WIN32
+ getchar();
+# endif
+ exit(EXIT_FAILURE);
+#endif
+}
+
+__host__ __device__
+thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) {
+ int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index);
+ return thrust::default_random_engine(h);
+}
+
+//Kernel that writes the image to the OpenGL PBO directly.
+__global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, int iter, glm::vec3* image)
+{
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+ if (x < resolution.x && y < resolution.y) {
+ int index = x + (y * resolution.x);
+ glm::vec3 pix = image[index];
+
+ glm::ivec3 color;
+ color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255);
+ color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255);
+ color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255);
+
+ // Each thread writes one pixel location in the texture (textel)
+ pbo[index].w = 0;
+ pbo[index].x = color.x;
+ pbo[index].y = color.y;
+ pbo[index].z = color.z;
+ }
+}
+
+__global__ void gbufferToPBO(uchar4* pbo, glm::ivec2 resolution, GBufferPixel* gBuffer) {
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+ if (x < resolution.x && y < resolution.y) {
+ int index = x + (y * resolution.x);
+
+#if GBUFFER_RENDER == 0
+ // Intersect
+ float timeToIntersect = gBuffer[index].t * 255.f;
+ pbo[index].w = 0;
+ pbo[index].x = timeToIntersect;
+ pbo[index].y = timeToIntersect;
+ pbo[index].z = timeToIntersect;
+#elif GBUFFER_RENDER == 1
+ glm::vec4 posToColor = glm::vec4(glm::normalize(glm::abs(gBuffer[index].pos)) * 255.f, 0.f);
+ pbo[index].w = posToColor.w;
+ pbo[index].x = posToColor.x;
+ pbo[index].y = posToColor.y;
+ pbo[index].z = posToColor.z;
+#elif GBUFFER_RENDER == 2
+ glm::vec4 norToColor = glm::vec4(glm::normalize(glm::abs(gBuffer[index].nor)) * 255.f, 0.f);
+ pbo[index].w = norToColor.w;
+ pbo[index].x = norToColor.x;
+ pbo[index].y = norToColor.y;
+ pbo[index].z = norToColor.z;
+#endif // GBUFFER_RENDER
+ }
+}
+
+static Scene* hst_scene = NULL;
+static glm::vec3* dev_image = NULL;
+static glm::vec3* dev_imageDenoise = NULL;
+static glm::vec3* dev_imageDenoiseDup = NULL;
+static Geom* dev_geoms = NULL;
+static Material* dev_materials = NULL;
+static PathSegment* dev_paths = NULL;
+static ShadeableIntersection* dev_intersections = NULL;
+static GBufferPixel* dev_gBuffer = NULL;
+static float* dev_gaussian = NULL;
+// TODO: static variables for device memory, any extra info you need, etc
+// ...
+
+void pathtraceInit(Scene *scene) {
+ hst_scene = scene;
+ const Camera &cam = hst_scene->state.camera;
+ const int pixelcount = cam.resolution.x * cam.resolution.y;
+
+ cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
+ cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
+
+ cudaMalloc(&dev_imageDenoise, pixelcount * sizeof(glm::vec3));
+ cudaMemset(dev_imageDenoise, 0, pixelcount * sizeof(glm::vec3));
+
+ cudaMalloc(&dev_imageDenoiseDup, pixelcount * sizeof(glm::vec3));
+ cudaMemset(dev_imageDenoiseDup, 0, pixelcount * sizeof(glm::vec3));
+
+ cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
+
+ cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
+ cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
+
+ cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
+ cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
+
+ cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
+ cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+
+ cudaMalloc(&dev_gBuffer, pixelcount * sizeof(GBufferPixel));
+
+ cudaMalloc(&dev_gaussian, 25 * sizeof(float));
+ cudaMemcpy(dev_gaussian, gaussian, 25 * sizeof(float), cudaMemcpyHostToDevice);
+
+ // TODO: initialize any extra device memeory you need
+
+ checkCUDAError("pathtraceInit");
+}
+
+void pathtraceFree() {
+ cudaFree(dev_image); // no-op if dev_image is null
+ cudaFree(dev_imageDenoise);
+ cudaFree(dev_imageDenoiseDup);
+ cudaFree(dev_paths);
+ cudaFree(dev_geoms);
+ cudaFree(dev_materials);
+ cudaFree(dev_intersections);
+ cudaFree(dev_gBuffer);
+ cudaFree(dev_gaussian);
+ // TODO: clean up any extra device memory you created
+
+ checkCUDAError("pathtraceFree");
+}
+
+/**
+* Generate PathSegments with rays from the camera through the screen into the
+* scene, which is the first bounce of rays.
+*
+* Antialiasing - add rays for sub-pixel sampling
+* motion blur - jitter rays "in time"
+* lens effect - jitter ray origin positions based on a lens
+*/
+__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments)
+{
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+ if (x < cam.resolution.x && y < cam.resolution.y) {
+ int index = x + (y * cam.resolution.x);
+ PathSegment & segment = pathSegments[index];
+
+ thrust::default_random_engine rng = makeSeededRandomEngine(iter, index, 0);
+ thrust::uniform_real_distribution uhh(-0.5f, 0.5f);
+
+ segment.ray.origin = cam.position;
+ segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
+
+ segment.ray.direction = glm::normalize(
+ cam.view -
+ cam.right * cam.pixelLength.x * ((float)x + uhh(rng) - (float)cam.resolution.x * 0.5f) -
+ cam.up * cam.pixelLength.y * ((float)y + uhh(rng) - (float)cam.resolution.y * 0.5f));
+
+ segment.pixelIndex = index;
+ segment.remainingBounces = traceDepth;
+ }
+}
+
+__global__ void computeIntersections(
+ int depth,
+ int num_paths,
+ PathSegment * pathSegments,
+ Geom * geoms,
+ int geoms_size,
+ ShadeableIntersection * intersections)
+{
+ int path_index = blockIdx.x * blockDim.x + threadIdx.x;
+
+ if (path_index < num_paths)
+ {
+ PathSegment pathSegment = pathSegments[path_index];
+
+ float t;
+ glm::vec3 intersect_point;
+ glm::vec3 normal;
+ float t_min = FLT_MAX;
+ int hit_geom_index = -1;
+ bool outside = true;
+
+ glm::vec3 tmp_intersect;
+ glm::vec3 tmp_normal;
+
+ // naive parse through global geoms
+
+ for (int i = 0; i < geoms_size; i++)
+ {
+ Geom & geom = geoms[i];
+
+ if (geom.type == CUBE)
+ {
+ t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+ }
+ else if (geom.type == SPHERE)
+ {
+ t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+ }
+
+ // Compute the minimum t from the intersection tests to determine what
+ // scene geometry object was hit first.
+ if (t > 0.0f && t_min > t)
+ {
+ t_min = t;
+ hit_geom_index = i;
+ intersect_point = tmp_intersect;
+ normal = tmp_normal;
+ }
+ }
+
+ if (hit_geom_index == -1)
+ {
+ intersections[path_index].t = -1.0f;
+ }
+ else
+ {
+ //The ray hits something
+ intersections[path_index].t = t_min;
+ intersections[path_index].materialId = geoms[hit_geom_index].materialid;
+ intersections[path_index].surfaceNormal = normal;
+ }
+ }
+}
+
+__global__ void shadeSimpleMaterials (
+ int iter,
+ int num_paths,
+ ShadeableIntersection * shadeableIntersections,
+ PathSegment * pathSegments,
+ Material * materials)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx < num_paths)
+ {
+ ShadeableIntersection intersection = shadeableIntersections[idx];
+ PathSegment segment = pathSegments[idx];
+ if (segment.remainingBounces == 0) {
+ return;
+ }
+
+ if (intersection.t > 0.0f) { // if the intersection exists...
+ segment.remainingBounces--;
+ // Set up the RNG
+ thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, segment.remainingBounces);
+
+ Material material = materials[intersection.materialId];
+ glm::vec3 materialColor = material.color;
+
+ // If the material indicates that the object was a light, "light" the ray
+ if (material.emittance > 0.0f) {
+ segment.color *= (materialColor * material.emittance);
+ segment.remainingBounces = 0;
+ }
+ else {
+ glm::vec3 intersectPos = intersection.t * segment.ray.direction + segment.ray.origin;
+ scatterRay(segment, intersectPos, intersection.surfaceNormal, material, rng);
+ }
+ // If there was no intersection, color the ray black.
+ // Lots of renderers use 4 channel color, RGBA, where A = alpha, often
+ // used for opacity, in which case they can indicate "no opacity".
+ // This can be useful for post-processing and image compositing.
+ } else {
+ segment.color = glm::vec3(0.0f);
+ segment.remainingBounces = 0;
+ }
+
+ pathSegments[idx] = segment;
+ }
+}
+
+__global__ void generateGBuffer (
+ int num_paths,
+ ShadeableIntersection* shadeableIntersections,
+ PathSegment* pathSegments,
+ GBufferPixel* gBuffer)
+{
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index < num_paths)
+ {
+ ShadeableIntersection& i = shadeableIntersections[index];
+ Ray& r = pathSegments[index].ray;
+
+ gBuffer[index].t = i.t;
+ gBuffer[index].pos = r.origin + i.t * r.direction;
+ gBuffer[index].nor = i.surfaceNormal;
+
+ }
+}
+
+// Add the current iteration's output to the overall image
+__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths)
+{
+ int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+ if (index < nPaths)
+ {
+ PathSegment iterationPath = iterationPaths[index];
+ image[iterationPath.pixelIndex] += iterationPath.color;
+ }
+
+ //showImage(uchar4 * pbo, int iter, bool ui_denoise)
+}
+
+/**
+ * Wrapper for the __global__ call that sets up the kernel calls and does a ton
+ * of memory management
+ */
+int pathtrace(int frame, int iter, int lastIter) {
+ const int traceDepth = hst_scene->state.traceDepth;
+ const Camera &cam = hst_scene->state.camera;
+ const int pixelcount = cam.resolution.x * cam.resolution.y;
+
+ // 2D block for generating ray from camera
+ const dim3 blockSize2d(8, 8);
+ const dim3 blocksPerGrid2d(
+ (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
+ (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
+
+ // 1D block for path tracing
+ const int blockSize1d = 128;
+
+ ///////////////////////////////////////////////////////////////////////////
+
+ // Pathtracing Recap:
+ // * Initialize array of path rays (using rays that come out of the camera)
+ // * You can pass the Camera object to that kernel.
+ // * Each path ray must carry at minimum a (ray, color) pair,
+ // * where color starts as the multiplicative identity, white = (1, 1, 1).
+ // * This has already been done for you.
+ // * NEW: For the first depth, generate geometry buffers (gbuffers)
+ // * For each depth:
+ // * Compute an intersection in the scene for each path ray.
+ // A very naive version of this has been implemented for you, but feel
+ // free to add more primitives and/or a better algorithm.
+ // Currently, intersection distance is recorded as a parametric distance,
+ // t, or a "distance along the ray." t = -1.0 indicates no intersection.
+ // * Color is attenuated (multiplied) by reflections off of any object
+ // * Stream compact away all of the terminated paths.
+ // You may use either your implementation or `thrust::remove_if` or its
+ // cousins.
+ // * Note that you can't really use a 2D kernel launch any more - switch
+ // to 1D.
+ // * Shade the rays that intersected something or didn't bottom out.
+ // That is, color the ray by performing a color computation according
+ // to the shader, then generate a new ray to continue the ray path.
+ // We recommend just updating the ray's PathSegment in place.
+ // Note that this step may come before or after stream compaction,
+ // since some shaders you write may also cause a path to terminate.
+ // * Finally:
+ // * if not denoising, add this iteration's results to the image
+ // * TODO: if denoising, run kernels that take both the raw pathtraced result and the gbuffer, and put the result in the "pbo" from opengl
+
+ generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths);
+ checkCUDAError("generate camera ray");
+
+ int depth = 0;
+ PathSegment* dev_path_end = dev_paths + pixelcount;
+ int num_paths = dev_path_end - dev_paths;
+ int ret = (lastIter) ? num_paths : -1;
+
+ // --- PathSegment Tracing Stage ---
+ // Shoot ray into scene, bounce between objects, push shading chunks
+
+ // Empty gbuffer
+ cudaMemset(dev_gBuffer, 0, pixelcount * sizeof(GBufferPixel));
+
+ // clean shading chunks
+ cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+
+ bool iterationComplete = false;
+ while (!iterationComplete) {
+
+ dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
+ computeIntersections <<>> (
+ depth,
+ num_paths,
+ dev_paths,
+ dev_geoms,
+ hst_scene->geoms.size(),
+ dev_intersections);
+ checkCUDAError("trace one bounce");
+ cudaDeviceSynchronize();
+
+ if (depth == 0 && lastIter) {
+ generateGBuffer<<>>(num_paths, dev_intersections, dev_paths, dev_gBuffer);
+ }
+ depth++;
+
+ shadeSimpleMaterials<<>> (
+ iter,
+ num_paths,
+ dev_intersections,
+ dev_paths,
+ dev_materials);
+ iterationComplete = depth == traceDepth;
+
+ }
+
+ // Assemble this iteration and apply it to the image
+ dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
+ finalGather<<>>(num_paths, dev_image, dev_paths);
+ ///////////////////////////////////////////////////////////////////////////
+
+ // CHECKITOUT: use dev_image as reference if you want to implement saving denoised images.
+ // Otherwise, screenshots are also acceptable.
+ // Retrieve image from GPU
+ cudaMemcpy(hst_scene->state.image.data(), dev_image,
+ pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
+
+ checkCUDAError("pathtrace");
+ cudaDeviceSynchronize();
+ return ret;
+}
+
+// CHECKITOUT: this kernel "post-processes" the gbuffer/gbuffers into something that you can visualize for debugging.
+void showGBuffer(uchar4* pbo) {
+ const Camera &cam = hst_scene->state.camera;
+ const dim3 blockSize2d(8, 8);
+ const dim3 blocksPerGrid2d(
+ (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
+ (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
+
+ // CHECKITOUT: process the gbuffer results and send them to OpenGL buffer for visualization
+ gbufferToPBO<<>>(pbo, cam.resolution, dev_gBuffer);
+}
+
+void showImage(uchar4* pbo, int iter, bool ui_denoise) {
+const Camera &cam = hst_scene->state.camera;
+ const dim3 blockSize2d(8, 8);
+ const dim3 blocksPerGrid2d(
+ (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
+ (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
+
+ // Send results to OpenGL buffer for rendering
+ if (ui_denoise) {
+ sendImageToPBO << > > (pbo, cam.resolution, iter, dev_imageDenoise);
+ }
+ else {
+ sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image);
+ }
+}
+
+__global__ void denoiseIter(
+ const Camera cam,
+ const int step,
+ const float c_phi,
+ const float p_phi,
+ const float n_phi,
+ const float* gaussian,
+ glm::vec3* imageDenoise,
+ const glm::vec3* image,
+ const GBufferPixel* gBuffer)
+{
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+ if (x < cam.resolution.x && y < cam.resolution.y) {
+
+ glm::vec3 sumColor = glm::vec3(0.f);
+ float sumWeight = 0.f;
+
+ int index = x + (y * cam.resolution.x);
+
+ glm::vec3 col = image[index];
+ glm::vec3 pos = gBuffer[index].pos;
+ glm::vec3 nor = gBuffer[index].nor;
+
+ for (int j = -2; j <= 2; j++) {
+
+ //int relY = glm::clamp(y + j * step, 0, cam.resolution.y);
+ int relY = y + j * step;
+ if (relY < 0 || relY >= cam.resolution.y) continue;
+
+ for (int i = -2; i <= 2; i++) {
+
+ //int relX = glm::clamp(x + i * step, 0, cam.resolution.x);
+ int relX = x + i * step;
+ if (relX < 0 || relX >= cam.resolution.x) continue;
+
+ int relIndex = relX + cam.resolution.x * relY;
+
+ glm::vec3 t; glm::vec3 colTemp; float dist2;
+ // Color weighting
+ colTemp = image[relIndex];
+ t = col - colTemp;
+ dist2 = glm::dot(t, t);
+ float c_w = glm::min(std::exp(-(dist2) / (c_phi + EPSILON)), 1.f);
+
+ // Position weighting
+ t = pos - gBuffer[relIndex].pos;
+ dist2 = glm::dot(t, t);
+ float p_w = glm::min(std::exp(-(dist2) / (p_phi + EPSILON)), 1.f);
+
+ // Normal weighting
+ t = nor - gBuffer[relIndex].nor;
+ dist2 = glm::max(glm::dot(t, t), 0.f);
+ float n_w = glm::min(std::exp(-(dist2) / (n_phi + EPSILON)), 1.f);
+
+ //float weight = c_w * c_w * p_w * p_w * n_w * n_w;
+ float weight = c_w * p_w * n_w;
+ float influence = weight * gaussian[((i + 2) + 5 * (j + 2))];
+ sumColor += (colTemp * influence);
+ sumWeight += influence;
+ }
+ }
+
+ imageDenoise[index] = sumColor / sumWeight;
+
+ }
+}
+
+void denoise(const int filterSize, const float cPhi, const float pPhi, const float nPhi) {
+
+ if (filterSize < 25) return;
+
+ const Camera& cam = hst_scene->state.camera;
+
+ const dim3 blockSize2d(BLOCK_LENGTH, BLOCK_LENGTH);
+ const dim3 blocksPerGrid2d(
+ (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
+ (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
+
+ cudaMemcpy(dev_imageDenoiseDup, dev_image, cam.resolution.x * cam.resolution.y * sizeof(glm::vec3), cudaMemcpyDeviceToDevice);
+
+ for (int step = 1; step <= std::floor(std::sqrt(filterSize)); step++) {
+ denoiseIter << > > (cam, 1 << step, cPhi, pPhi, nPhi, dev_gaussian, dev_imageDenoise, dev_imageDenoiseDup, dev_gBuffer);
+ std::swap(dev_imageDenoise, dev_imageDenoiseDup);
+ }
+ cudaDeviceSynchronize();
+}
\ No newline at end of file
diff --git a/src/pathtrace.h b/src/pathtrace.h
index 9e12f44..5a26cdf 100644
--- a/src/pathtrace.h
+++ b/src/pathtrace.h
@@ -5,6 +5,9 @@
void pathtraceInit(Scene *scene);
void pathtraceFree();
-void pathtrace(int frame, int iteration);
+int pathtrace(int frame, int iteration, int lastIter);
void showGBuffer(uchar4 *pbo);
-void showImage(uchar4 *pbo, int iter);
+void showImage(uchar4 *pbo, int iter, bool ui_denoise);
+
+void denoise(const int filterSize, const float cPhi, const float pPhi, const float nPhi);
+
diff --git a/src/sceneStructs.h b/src/sceneStructs.h
index da7e558..7f1ddcb 100644
--- a/src/sceneStructs.h
+++ b/src/sceneStructs.h
@@ -78,5 +78,8 @@ struct ShadeableIntersection {
// CHECKITOUT - a simple struct for storing scene geometry information per-pixel.
// What information might be helpful for guiding a denoising filter?
struct GBufferPixel {
- float t;
+ float t;
+ glm::vec3 pos; // Positions
+ glm::vec3 nor; // Normals
+ // Optional diffuse determined from pathtrace.cu's finalGather
};
diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu
new file mode 100644
index 0000000..062427d
--- /dev/null
+++ b/stream_compaction/common.cu
@@ -0,0 +1,40 @@
+#include "common.h"
+
+
+namespace StreamCompaction {
+ namespace Common {
+
+ /**
+ * Maps an array to an array of 0s and 1s for stream compaction. Elements
+ * which map to 0 will be removed, and elements which map to 1 will be kept.
+ */
+ __global__ void kernMapToBoolean(int n, int *bools1, int *bools2, const int *idata) {
+ // TODO
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+ if (index >= n) {
+ return;
+ }
+
+ int result = idata[index] != 0;
+ bools1[index] = result;
+ bools2[index] = result;
+ }
+
+ /**
+ * Performs scatter on an array. That is, for each element in idata,
+ * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]].
+ */
+ __global__ void kernScatter(int n, int *odata,
+ const int *idata, const int *bools, const int *indices) {
+ // TODO
+ int index = threadIdx.x + (blockIdx.x * blockDim.x);
+ if (index >= n) {
+ return;
+ }
+
+ if (bools[index]) {
+ odata[indices[index]] = idata[index];
+ }
+ }
+ }
+}
diff --git a/stream_compaction/common.h b/stream_compaction/common.h
new file mode 100644
index 0000000..74fe808
--- /dev/null
+++ b/stream_compaction/common.h
@@ -0,0 +1,129 @@
+#pragma once
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define useCommon 1
+
+/**
+ * Check for CUDA errors; print and exit if there was a problem.
+ */
+inline int ilog2(int x) {
+ int lg = 0;
+ while (x >>= 1) {
+ ++lg;
+ }
+ return lg;
+}
+
+inline int ilog2ceil(int x) {
+ return x == 1 ? 0 : ilog2(x - 1) + 1;
+}
+
+namespace StreamCompaction {
+ namespace Common {
+ __global__ void kernMapToBoolean(int n, int *bools1, int *bools2, const int *idata);
+
+ __global__ void kernScatter(int n, int *odata,
+ const int *idata, const int *bools, const int *indices);
+
+ /**
+ * This class is used for timing the performance
+ * Uncopyable and unmovable
+ *
+ * Adapted from WindyDarian(https://github.com/WindyDarian)
+ */
+ class PerformanceTimer
+ {
+ public:
+ PerformanceTimer()
+ {
+ cudaEventCreate(&event_start);
+ cudaEventCreate(&event_end);
+ }
+
+ ~PerformanceTimer()
+ {
+ cudaEventDestroy(event_start);
+ cudaEventDestroy(event_end);
+ }
+
+ void startCpuTimer()
+ {
+ if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); }
+ cpu_timer_started = true;
+
+ time_start_cpu = std::chrono::high_resolution_clock::now();
+ }
+
+ void endCpuTimer()
+ {
+ time_end_cpu = std::chrono::high_resolution_clock::now();
+
+ if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); }
+
+ std::chrono::duration duro = time_end_cpu - time_start_cpu;
+ prev_elapsed_time_cpu_milliseconds =
+ static_cast(duro.count());
+
+ cpu_timer_started = false;
+ }
+
+ void startGpuTimer()
+ {
+ if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); }
+ gpu_timer_started = true;
+
+ cudaEventRecord(event_start);
+ }
+
+ void endGpuTimer()
+ {
+ cudaEventRecord(event_end);
+ cudaEventSynchronize(event_end);
+
+ if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); }
+
+ cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end);
+ gpu_timer_started = false;
+ }
+
+ float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015
+ {
+ return prev_elapsed_time_cpu_milliseconds;
+ }
+
+ float getGpuElapsedTimeForPreviousOperation() //noexcept
+ {
+ return prev_elapsed_time_gpu_milliseconds;
+ }
+
+ // remove copy and move functions
+ PerformanceTimer(const PerformanceTimer&) = delete;
+ PerformanceTimer(PerformanceTimer&&) = delete;
+ PerformanceTimer& operator=(const PerformanceTimer&) = delete;
+ PerformanceTimer& operator=(PerformanceTimer&&) = delete;
+
+ private:
+ cudaEvent_t event_start = nullptr;
+ cudaEvent_t event_end = nullptr;
+
+ using time_point_t = std::chrono::high_resolution_clock::time_point;
+ time_point_t time_start_cpu;
+ time_point_t time_end_cpu;
+
+ bool cpu_timer_started = false;
+ bool gpu_timer_started = false;
+
+ float prev_elapsed_time_cpu_milliseconds = 0.f;
+ float prev_elapsed_time_gpu_milliseconds = 0.f;
+ };
+ }
+}