Skip to content

Commit 5e28caf

Browse files
committed
GBuffer (first ray cache) + basic procedural texture (required for texture mapping)
1 parent 2ef88fd commit 5e28caf

File tree

8 files changed

+211
-55
lines changed

8 files changed

+211
-55
lines changed

README.md

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
CUDA Path Tracer
22
================
33

4-
**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2**
4+
**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
55

66
* Chang Liu
77
* [LinkedIn](https://www.linkedin.com/in/chang-liu-0451a6208/)
@@ -11,6 +11,99 @@ CUDA Path Tracer
1111
- i7-12700 @ 4.90GHz with 16GB RAM
1212
- RTX 3070 Ti Laptop 8GB
1313

14+
## Representative Outcome
15+
1416
![](./img/photo_realistic.jpg)
1517

16-
![](./img/photo_realistic_2.jpg)
18+
<div align="center">"Photoly" realistic!</div>
19+
20+
<div align="center">Rendered at 2400x1800, 3000 spp within 7 minutes </div>
21+
22+
![](./img/aperture_custom.jpg)
23+
24+
<div align="center">Star-shaped bokehs</div>
25+
26+
## Introduction
27+
28+
This is our third project of CIS 565 Fall 2022. In this project, our goal is to implement a GPU-accelerated ray tracer with CUDA.
29+
30+
31+
32+
## Features
33+
34+
### Visual
35+
36+
#### Direct Lighting with Multiple Importance Sampling
37+
38+
#### Importance Sampled Skybox (Environment Map)
39+
40+
Tired of "virtual artificial" light sources? Let's introduce some real-world li
41+
42+
#### Physically-Based Materials
43+
44+
#### Normal Map & PBR Texture
45+
46+
#### Physically-Based Camera: Depth of Field & Custom Bokeh Shape
47+
48+
This is really my favorite part of the project.
49+
50+
<div align="center">
51+
<img src="./img/aperture_off.jpg" width="49%"/>
52+
<img src="./img/aperture.jpg" width="49%"/>
53+
</div>
54+
55+
56+
57+
#### Xor-Scrambled Sobol Low Discrepancy Sequence
58+
59+
#### Post Processing
60+
61+
62+
63+
### Performance
64+
65+
#### Stackless SAH-Constructed Bounding Volume Hierarchy
66+
67+
For ray-scene intersection, I did two levels of optimization.
68+
69+
First, I wrote a SAH-based BVH. SAH, the Surface Area Heuristic is a method to decide how to split a set of bounding volumes
70+
71+
The second level of optimization
72+
73+
#### Single-Kernel Path Tracing
74+
75+
There is a paper . It had an interesting opinion: instead of
76+
77+
### Other
78+
79+
#### Streamed Path Tracing Using Stream Compaction
80+
81+
#### First Ray Caching (G-Buffer)
82+
83+
Since I implemented anti-aliasing and physically based camera at the very beginning, when I noticed that there is still a requirement in the basic part, I found it
84+
85+
## Performance Analysis
86+
87+
### How Much GPU Improves Path Tracing Efficiency
88+
89+
I'm able and confident to answer this question because I have one CPU path tracer from undergrad.
90+
91+
### Why My Multi-Kernel Streamed Path Tracer Not Faster Than Single-Kernel?
92+
93+
To know how streaming the rays can improve path tracing efficiency, I additionally implemented a single-kernel version of this path tracer.
94+
95+
What got me surprised it wasn't efficient as expected. In some scenes, it was even worse.
96+
97+
Using NSight Compute, I inspected
98+
99+
In general, it's a tradeoff between thread concurrency and time spent accessing global memory.
100+
101+
### Material Sorting: Why Slower
102+
103+
Or, there is another possibility that BSDF sampling and evaluation is not that time consuming as expected. The bottleneck still lies in traversal of acceleration structure.
104+
105+
Therefore, in my opinion, material sorting is best applied when:
106+
107+
- There are many different materials in the scene
108+
- Primitives sharing the same material are randomly distributed in many small clusters over the scene space. The clusters' sizes in solid angle are typically less than what a GPU warp can cover
109+

src/common.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
#define CAMERA_PANORAMA false
1313

14-
#define CAMERA_APERTURE_MASK false
15-
1614
struct ToneMapping {
1715
enum {
1816
None = 0, Filmic = 1, ACES = 2

src/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ void mousePositionCallback(GLFWwindow* window, double xpos, double ypos) {
297297

298298
if (leftMousePressed) {
299299
// compute new camera parameters
300-
cam.rotation.x -= (xpos - lastX) / width * 20.f;
301-
cam.rotation.y += (ypos - lastY) / height * 20.f;
300+
cam.rotation.x -= (xpos - lastX) / width * 40.f;
301+
cam.rotation.y += (ypos - lastY) / height * 40.f;
302302
cam.rotation.y = glm::clamp(cam.rotation.y, -89.9f, 89.9f);
303303
State::camChanged = true;
304304
}

src/material.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#define MATERIAL_DIELETRIC_USE_SCHLICK_APPROX false
1010

1111
#define NullTextureId -1
12+
#define ProcTextureId -2
1213
#define ProceduralTexId -2
1314
#define InvalidPdf -1.f
1415

src/pathtrace.cu

Lines changed: 94 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,58 @@ static thrust::device_ptr<int> devSegmentMatKeysThr;
7373

7474
static glm::vec3* devGBufferPos = nullptr;
7575
static glm::vec3* devGBufferNorm = nullptr;
76+
77+
#if ENABLE_GBUFFER
78+
static Intersection* devGBuffer = nullptr;
79+
#endif
7680

7781
void InitDataContainer(GuiDataContainer* imGuiData) {
7882
guiData = imGuiData;
7983
}
8084

85+
#if ENABLE_GBUFFER
86+
#endif
87+
88+
__global__ void renderGBuffer(DevScene* scene, Camera cam, Intersection *GBuffer) {
89+
int idx = blockDim.x * blockIdx.x + threadIdx.x;
90+
int idy = blockDim.y * blockIdx.y + threadIdx.y;
91+
if (idx >= cam.resolution.x || idy >= cam.resolution.y) {
92+
return;
93+
}
94+
95+
float aspect = float(cam.resolution.x) / cam.resolution.y;
96+
float tanFovY = glm::tan(glm::radians(cam.fov.y));
97+
glm::vec2 pixelSize = 1.f / glm::vec2(cam.resolution);
98+
glm::vec2 scr = glm::vec2(idx, idy) * pixelSize;
99+
glm::vec2 ruv = scr + pixelSize * glm::vec2(.5f);
100+
ruv = 1.f - ruv * 2.f;
101+
102+
glm::vec3 pLens(0.f);
103+
glm::vec3 pFocusPlane = glm::vec3(ruv * glm::vec2(aspect, 1.f) * tanFovY, 1.f) * cam.focalDist;
104+
glm::vec3 dir = pFocusPlane - pLens;
105+
106+
Ray ray;
107+
ray.direction = glm::normalize(glm::mat3(cam.right, cam.up, cam.view) * dir);
108+
ray.origin = cam.position + cam.right * pLens.x + cam.up * pLens.y;
109+
110+
Intersection intersec;
111+
scene->intersect(ray, intersec);
112+
113+
if (intersec.primId != NullPrimitive) {
114+
if (scene->materials[intersec.matId].type == Material::Type::Light) {
115+
#if SCENE_LIGHT_SINGLE_SIDED
116+
if (glm::dot(intersec.norm, ray.direction) < 0.f) {
117+
intersec.primId = NullPrimitive;
118+
}
119+
#endif
120+
}
121+
else {
122+
intersec.wo = -ray.direction;
123+
}
124+
}
125+
GBuffer[idy * cam.resolution.x + idx] = intersec;
126+
}
127+
81128
void pathTraceInit(Scene* scene) {
82129
hstScene = scene;
83130

@@ -100,17 +147,32 @@ void pathTraceInit(Scene* scene) {
100147
cudaMalloc(&devSegmentMatKeys, pixelcount * sizeof(int));
101148
devIntersecMatKeysThr = thrust::device_ptr<int>(devIntersecMatKeys);
102149
devSegmentMatKeysThr = thrust::device_ptr<int>(devSegmentMatKeys);
103-
104150
checkCUDAError("pathTraceInit");
151+
152+
#if ENABLE_GBUFFER
153+
cudaMalloc(&devGBuffer, pixelcount * sizeof(Intersection));
154+
const int BlockSize = 8;
155+
dim3 blockSize(BlockSize, BlockSize);
156+
157+
dim3 blockNum((cam.resolution.x + BlockSize - 1) / BlockSize,
158+
(cam.resolution.y + BlockSize - 1) / BlockSize
159+
);
160+
renderGBuffer<<<blockNum, blockSize>>>(hstScene->devScene, cam, devGBuffer);
161+
checkCUDAError("GBuffer");
162+
std::cout << "[GBuffer generated]" << std::endl;
163+
#endif
105164
}
106165

107166
void pathTraceFree() {
108-
cudaFree(devImage); // no-op if devImage is null
109-
cudaFree(devPaths);
110-
cudaFree(devTerminatedPaths);
111-
cudaFree(devIntersections);
112-
cudaFree(devIntersecMatKeys);
113-
cudaFree(devSegmentMatKeys);
167+
cudaSafeFree(devImage); // no-op if devImage is null
168+
cudaSafeFree(devPaths);
169+
cudaSafeFree(devTerminatedPaths);
170+
cudaSafeFree(devIntersections);
171+
cudaSafeFree(devIntersecMatKeys);
172+
cudaSafeFree(devSegmentMatKeys);
173+
#if ENABLE_GBUFFER
174+
cudaSafeFree(devGBuffer);
175+
#endif
114176
}
115177

116178
/**
@@ -154,19 +216,10 @@ __device__ Ray sampleCamera(DevScene* scene, const Camera& cam, int x, int y, gl
154216
return ray;
155217
}
156218

157-
/**
158-
* Generate PathSegments with rays from the camera through the screen into the
159-
* scene, which is the first bounce of rays.
160-
*
161-
* Antialiasing - add rays for sub-pixel sampling
162-
* motion blur - jitter rays "in time"
163-
* lens effect - jitter ray origin positions based on a lens
164-
*/
165219
__global__ void generateRayFromCamera(
166220
DevScene* scene, Camera cam,
167221
int iter, int traceDepth, PathSegment* pathSegments
168222
) {
169-
170223
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
171224
int y = (blockIdx.y * blockDim.y) + threadIdx.y;
172225

@@ -183,18 +236,13 @@ __global__ void generateRayFromCamera(
183236
}
184237
}
185238

186-
__global__ void previewGBuffer(
187-
int iter,
188-
DevScene* scene, Camera cam,
189-
glm::vec3* image, int width, int height,
190-
int kind
191-
) {
239+
__global__ void previewGBuffer(int iter, DevScene* scene, Camera cam, glm::vec3* image, int kind) {
192240
int x = blockDim.x * blockIdx.x + threadIdx.x;
193241
int y = blockDim.y * blockIdx.y + threadIdx.y;
194-
if (x >= width || y >= height) {
242+
if (x >= cam.resolution.x || y >= cam.resolution.y) {
195243
return;
196244
}
197-
int index = y * width + x;
245+
int index = y * cam.resolution.x + x;
198246
Sampler rng = makeSeededRandomEngine(iter, index, 0, scene->sampleSequence);
199247

200248
Ray ray = sampleCamera(scene, cam, x, y, sample4D(rng));
@@ -215,9 +263,6 @@ __global__ void previewGBuffer(
215263
}
216264
}
217265

218-
// computeIntersections handles generating ray intersections ONLY.
219-
// Generating new rays is handled in your shader(s).
220-
// Feel free to modify the code below.
221266
__global__ void computeIntersections(
222267
int depth,
223268
int numPaths,
@@ -226,6 +271,9 @@ __global__ void computeIntersections(
226271
Intersection* intersections,
227272
int* materialKeys,
228273
bool sortMaterial
274+
#if ENABLE_GBUFFER
275+
, Intersection* GBuffer
276+
#endif
229277
) {
230278
int pathIdx = blockIdx.x * blockDim.x + threadIdx.x;
231279

@@ -235,6 +283,13 @@ __global__ void computeIntersections(
235283

236284
Intersection intersec;
237285
PathSegment segment = pathSegments[pathIdx];
286+
#if ENABLE_GBUFFER
287+
if (depth == 0) {
288+
intersections[pathIdx] = GBuffer[pathIdx];
289+
return;
290+
}
291+
#endif
292+
238293
#if BVH_DISABLE
239294
scene->naiveIntersect(segment.ray, intersec);
240295
#else
@@ -393,20 +448,15 @@ __global__ void finalGather(int nPaths, glm::vec3* image, PathSegment* iteration
393448
}
394449
}
395450

396-
__global__ void singleKernelPT(
397-
int iter, int maxDepth,
398-
DevScene* scene,
399-
Camera cam,
400-
glm::vec3* image, int width, int height
401-
) {
451+
__global__ void singleKernelPT(int iter, int maxDepth, DevScene* scene, Camera cam, glm::vec3* image) {
402452
int x = blockDim.x * blockIdx.x + threadIdx.x;
403453
int y = blockDim.y * blockIdx.y + threadIdx.y;
404-
if (x >= width || y >= height) {
454+
if (x >= cam.resolution.x || y >= cam.resolution.y) {
405455
return;
406456
}
407457
glm::vec3 accRadiance(0.f);
408458

409-
int index = y * width + x;
459+
int index = y * cam.resolution.x + x;
410460
Sampler rng = makeSeededRandomEngine(iter, index, 0, scene->sampleSequence);
411461

412462
Ray ray = sampleCamera(scene, cam, x, y, sample4D(rng));
@@ -508,13 +558,13 @@ WriteRadiance:
508558
image[index] += accRadiance;
509559
}
510560

511-
__global__ void BVHVisualize(int iter, DevScene* scene, Camera cam, glm::vec3* image, int width, int height) {
561+
__global__ void BVHVisualize(int iter, DevScene* scene, Camera cam, glm::vec3* image) {
512562
int x = blockDim.x * blockIdx.x + threadIdx.x;
513563
int y = blockDim.y * blockIdx.y + threadIdx.y;
514-
if (x >= width || y >= height) {
564+
if (x >= cam.resolution.x || y >= cam.resolution.y) {
515565
return;
516566
}
517-
int index = y * width + x;
567+
int index = y * cam.resolution.x + x;
518568

519569
Sampler rng = makeSeededRandomEngine(iter, index, 0, scene->sampleSequence);
520570
Ray ray = sampleCamera(scene, cam, x, y, sample4D(rng));
@@ -577,6 +627,9 @@ void pathTrace(uchar4* pbo, int frame, int iter) {
577627
int blockNumIntersec = (numPaths + BlockSizeIntersec - 1) / BlockSizeIntersec;
578628
computeIntersections<<<blockNumIntersec, BlockSizeIntersec>>>(
579629
depth, numPaths, devPaths, hstScene->devScene, devIntersections, devIntersecMatKeys, Settings::sortMaterial
630+
#if ENABLE_GBUFFER
631+
, devGBuffer
632+
#endif
580633
);
581634
checkCUDAError("PT::computeInteractions");
582635
cudaDeviceSynchronize();
@@ -627,16 +680,13 @@ void pathTrace(uchar4* pbo, int frame, int iter) {
627680
dim3 singlePTBlockSize(BlockSizeSinglePTX, BlockSizeSinglePTY);
628681

629682
if (Settings::tracer == Tracer::SingleKernel) {
630-
singleKernelPT<<<singlePTBlockNum, singlePTBlockSize>>>(
631-
iter, Settings::traceDepth, hstScene->devScene, cam, devImage, cam.resolution.x, cam.resolution.y);
683+
singleKernelPT<<<singlePTBlockNum, singlePTBlockSize>>>(iter, Settings::traceDepth, hstScene->devScene, cam, devImage);
632684
}
633685
else if (Settings::tracer == Tracer::BVHVisualize) {
634-
BVHVisualize<<<singlePTBlockNum, singlePTBlockSize>>>(
635-
iter, hstScene->devScene, cam, devImage, cam.resolution.x, cam.resolution.y);
686+
BVHVisualize<<<singlePTBlockNum, singlePTBlockSize>>>(iter, hstScene->devScene, cam, devImage);
636687
}
637688
else {
638-
previewGBuffer<<<singlePTBlockNum, singlePTBlockSize>>>(
639-
iter, hstScene->devScene, cam, devImage, cam.resolution.x, cam.resolution.y,
689+
previewGBuffer<<<singlePTBlockNum, singlePTBlockSize>>>(iter, hstScene->devScene, cam, devImage,
640690
Settings::GBufferPreviewOpt);
641691
}
642692

src/preview.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ void RenderImGui() {
256256

257257
glm::vec3 lastPos = cam.position;
258258
if (ImGui::DragFloat3("Position", glm::value_ptr(cam.position), .1f)) {
259-
cam.rotation += cam.position - lastPos;
260259
State::camChanged = true;
261260
}
262261

0 commit comments

Comments
 (0)