@@ -81,10 +81,14 @@ static PathSegment *dev_paths = NULL;
8181static ShadeableIntersection *dev_intersections = NULL ;
8282// TODO: static variables for device memory, any extra info you need, etc
8383// ...
84- static int *dev_materialIDs = NULL ;
85- static int *dev_materialIDBuffers = NULL ;
84+ static int *dev_materialIDs = NULL ;
85+ static int *dev_materialIDBuffers = NULL ;
86+ static glm::vec3 *dev_image_buffer = NULL ;
87+
88+ // first-bounce intersection caching
8689#ifdef CACHE_INTERSECTIONS
8790static ShadeableIntersection *dev_intersections_cache = NULL ;
91+ static int *dev_materialIDs_cache = NULL ;
8892#endif
8993
9094void pathtraceInit (Scene *scene) {
@@ -95,7 +99,7 @@ void pathtraceInit(Scene *scene) {
9599 cudaMalloc (&dev_image, pixelcount * sizeof (glm::vec3));
96100 cudaMemset (dev_image, 0 , pixelcount * sizeof (glm::vec3));
97101
98- cudaMalloc (&dev_paths, pixelcount * sizeof (PathSegment));
102+ cudaMalloc (&dev_paths, ANTIALIAS_FACTOR * pixelcount * sizeof (PathSegment));
99103
100104 cudaMalloc (&dev_geoms, scene->geoms .size () * sizeof (Geom));
101105 cudaMemcpy (dev_geoms, scene->geoms .data (), scene->geoms .size () * sizeof (Geom),
@@ -106,15 +110,22 @@ void pathtraceInit(Scene *scene) {
106110 scene->materials .size () * sizeof (Material),
107111 cudaMemcpyHostToDevice);
108112
109- cudaMalloc (&dev_intersections, pixelcount * sizeof (ShadeableIntersection));
110- cudaMemset (dev_intersections, 0 , pixelcount * sizeof (ShadeableIntersection));
113+ cudaMalloc (&dev_intersections,
114+ ANTIALIAS_FACTOR * pixelcount * sizeof (ShadeableIntersection));
115+ cudaMemset (dev_intersections, 0 ,
116+ ANTIALIAS_FACTOR * pixelcount * sizeof (ShadeableIntersection));
111117
112118 // TODO: initialize any extra device memeory you need
113- cudaMalloc ((void **)&dev_materialIDs, pixelcount * sizeof (int ));
114- cudaMalloc ((void **)&dev_materialIDBuffers, pixelcount * sizeof (int ));
119+ cudaMalloc ((void **)&dev_materialIDs,
120+ ANTIALIAS_FACTOR * pixelcount * sizeof (int ));
121+ cudaMalloc ((void **)&dev_materialIDBuffers,
122+ ANTIALIAS_FACTOR * pixelcount * sizeof (int ));
123+ cudaMalloc (&dev_image_buffer, pixelcount * sizeof (glm::vec3));
115124#ifdef CACHE_INTERSECTIONS
116125 cudaMalloc ((void **)&dev_intersections_cache,
117- pixelcount * sizeof (ShadeableIntersection));
126+ ANTIALIAS_FACTOR * pixelcount * sizeof (ShadeableIntersection));
127+ cudaMalloc ((void **)&dev_materialIDs_cache,
128+ ANTIALIAS_FACTOR * pixelcount * sizeof (int ));
118129#endif
119130
120131 checkCUDAError (" pathtraceInit" );
@@ -129,8 +140,10 @@ void pathtraceFree() {
129140 // TODO: clean up any extra device memory you created
130141 cudaFree (dev_materialIDs);
131142 cudaFree (dev_materialIDBuffers);
143+ cudaFree (dev_image_buffer);
132144#ifdef CACHE_INTERSECTIONS
133145 cudaFree (dev_intersections_cache);
146+ cudaFree (dev_materialIDs_cache);
134147#endif
135148
136149 checkCUDAError (" pathtraceFree" );
@@ -150,22 +163,40 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth,
150163 int y = (blockIdx .y * blockDim .y ) + threadIdx .y ;
151164
152165 if (x < cam.resolution .x && y < cam.resolution .y ) {
153- int index = x + (y * cam.resolution .x );
154- PathSegment &segment = pathSegments[index];
155-
156- segment.ray .origin = cam.position ;
157- segment.color = glm::vec3 (1 .0f , 1 .0f , 1 .0f );
166+ int index = x + (y * cam.resolution .x );
158167
159- // TODO: implement antialiasing by jittering the ray
168+ // primary ray per pixel
169+ PathSegment &segment = pathSegments[index];
170+ segment.ray .origin = cam.position ;
171+ segment.color = glm::vec3 (1 .0f , 1 .0f , 1 .0f );
172+ segment.pixelIndex = index;
173+ segment.remainingBounces = traceDepth;
160174 segment.ray .direction =
161175 glm::normalize (cam.view -
162176 cam.right * cam.pixelLength .x *
163177 ((float )x - (float )cam.resolution .x * 0 .5f ) -
164178 cam.up * cam.pixelLength .y *
165179 ((float )y - (float )cam.resolution .y * 0 .5f ));
166180
167- segment.pixelIndex = index;
168- segment.remainingBounces = traceDepth;
181+ // implement antialiasing by jittering the ray
182+ // sub-sampled extra rays per pixel
183+ int pixelcount = cam.resolution .x * cam.resolution .y ;
184+ for (int i = 1 ; i < ANTIALIAS_FACTOR; ++i) {
185+ PathSegment &extra_segment = pathSegments[i * pixelcount + index];
186+ extra_segment.ray .origin = cam.position ;
187+ extra_segment.color = glm::vec3 (1 .0f , 1 .0f , 1 .0f );
188+ extra_segment.pixelIndex = index;
189+ extra_segment.remainingBounces = traceDepth;
190+ thrust::default_random_engine rng =
191+ makeSeededRandomEngine (iter, index, i);
192+ thrust::uniform_real_distribution<float > u01 (0 , 1 );
193+ extra_segment.ray .direction = glm::normalize (
194+ cam.view -
195+ cam.right * cam.pixelLength .x *
196+ ((float )x + u01 (rng) - (float )cam.resolution .x * 0 .5f ) -
197+ cam.up * cam.pixelLength .y *
198+ ((float )y + u01 (rng) - (float )cam.resolution .y * 0 .5f ));
199+ }
169200 }
170201}
171202
@@ -263,14 +294,25 @@ __global__ void shadeMaterial(
263294 }
264295}
265296
266- // Add the current iteration's output to the overall image
267- __global__ void finalGather (int nPaths, glm::vec3 *image ,
268- PathSegment *iterationPaths) {
297+ // Add the current iteration's output to the image buffer
298+ __global__ void finalGather (int nPaths, glm::vec3 *img_buffer ,
299+ const PathSegment *iterationPaths) {
269300 int index = (blockIdx .x * blockDim .x ) + threadIdx .x ;
270-
271301 if (index < nPaths) {
272302 PathSegment iterationPath = iterationPaths[index];
273- image[iterationPath.pixelIndex ] += iterationPath.color ;
303+ atomicAdd (&img_buffer[iterationPath.pixelIndex ][0 ], iterationPath.color [0 ]);
304+ atomicAdd (&img_buffer[iterationPath.pixelIndex ][1 ], iterationPath.color [1 ]);
305+ atomicAdd (&img_buffer[iterationPath.pixelIndex ][2 ], iterationPath.color [2 ]);
306+ }
307+ }
308+
309+ // Average the accumulative subpixel values in image buffer & add it to final
310+ // image
311+ __global__ void addToImage (int pixelcount, glm::vec3 *image,
312+ const glm::vec3 *img_buffer) {
313+ int index = (blockIdx .x * blockDim .x ) + threadIdx .x ;
314+ if (index < pixelcount) {
315+ image[index] += (img_buffer[index] / (1 .0f * ANTIALIAS_FACTOR));
274316 }
275317}
276318
@@ -326,15 +368,14 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
326368 checkCUDAError (" generate camera ray" );
327369
328370 int depth = 0 ;
329- int num_active_paths = pixelcount;
371+ int num_active_paths = ANTIALIAS_FACTOR * pixelcount;
330372
331373 // --- PathSegment Tracing Stage ---
332374 // Shoot ray into scene, bounce between objects, push shading chunks
333-
334375 while (num_active_paths > 0 ) {
335376 // clean shading chunks
336377 cudaMemset (dev_intersections, 0 ,
337- pixelcount * sizeof (ShadeableIntersection));
378+ ANTIALIAS_FACTOR * pixelcount * sizeof (ShadeableIntersection));
338379
339380 // --- Tracing Stage ---
340381 dim3 numblocksPathSegmentTracing =
@@ -345,6 +386,18 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
345386 cudaMemcpy (dev_intersections, dev_intersections_cache,
346387 pixelcount * sizeof (ShadeableIntersection),
347388 cudaMemcpyDeviceToDevice);
389+ cudaMemcpy (dev_materialIDs, dev_materialIDs_cache,
390+ pixelcount * sizeof (int ), cudaMemcpyDeviceToDevice);
391+ if (num_active_paths - pixelcount > 0 ) {
392+ dim3 numBlocksAntialiasTracing =
393+ (num_active_paths - pixelcount + blockSize1d - 1 ) / blockSize1d;
394+ computeIntersections<<<numBlocksAntialiasTracing, blockSize1d>>> (
395+ depth, num_active_paths - pixelcount, dev_paths + pixelcount,
396+ dev_geoms, hst_scene->geoms .size (), dev_intersections + pixelcount,
397+ dev_materialIDs + pixelcount);
398+ checkCUDAError (" anti-alias extra rays trace one bounce" );
399+ cudaDeviceSynchronize ();
400+ }
348401 } else {
349402 computeIntersections<<<numblocksPathSegmentTracing, blockSize1d>>> (
350403 depth, num_active_paths, dev_paths, dev_geoms,
@@ -356,6 +409,8 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
356409 cudaMemcpy (dev_intersections_cache, dev_intersections,
357410 pixelcount * sizeof (ShadeableIntersection),
358411 cudaMemcpyDeviceToDevice);
412+ cudaMemcpy (dev_materialIDs_cache, dev_materialIDs,
413+ pixelcount * sizeof (int ), cudaMemcpyDeviceToDevice);
359414 }
360415 }
361416#else
@@ -401,9 +456,14 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
401456 }
402457
403458 // Assemble this iteration and apply it to the image
459+ cudaMemset (dev_image_buffer, 0 , pixelcount * sizeof (glm::vec3));
460+ dim3 numBlocksSubPixels =
461+ (ANTIALIAS_FACTOR * pixelcount + blockSize1d - 1 ) / blockSize1d;
462+ finalGather<<<numBlocksSubPixels, blockSize1d>>> (
463+ ANTIALIAS_FACTOR * pixelcount, dev_image_buffer, dev_paths);
404464 dim3 numBlocksPixels = (pixelcount + blockSize1d - 1 ) / blockSize1d;
405- finalGather <<<numBlocksPixels, blockSize1d>>> (pixelcount, dev_image,
406- dev_paths );
465+ addToImage <<<numBlocksPixels, blockSize1d>>> (pixelcount, dev_image,
466+ dev_image_buffer );
407467
408468 // Send results to OpenGL buffer for rendering
409469 sendImageToPBO<<<blocksPerGrid2d, blockSize2d>>> (pbo, cam.resolution , iter,
0 commit comments