55#include < thrust/random.h>
66#include < thrust/remove.h>
77#include < thrust/device_ptr.h>
8+ #include < thrust/sort.h>
89
910#include " sceneStructs.h"
1011#include " material.h"
1819#include " mathUtil.h"
1920#include " sampler.h"
2021
21- #define BVH_DEBUG_VISUALIZATION false
22-
23- int ToneMapping::method = ToneMapping::ACES;
24-
2522// Kernel that writes the image to the OpenGL PBO directly.
2623__global__ void sendImageToPBO (uchar4 * pbo, glm::ivec2 resolution,
2724 int iter, glm::vec3* Image, int toneMapping) {
@@ -63,10 +60,16 @@ static glm::vec3* devImage = nullptr;
6360static PathSegment* devPaths = nullptr ;
6461static PathSegment* devTerminatedPaths = nullptr ;
6562static Intersection* devIntersections = nullptr ;
63+ static int * devIntersecMatKeys = nullptr ;
64+ static int * devSegmentMatKeys = nullptr ;
6665// TODO: static variables for device memory, any extra info you need, etc
6766// ...
6867static thrust::device_ptr<PathSegment> devPathsThr;
6968static thrust::device_ptr<PathSegment> devTerminatedPathsThr;
69+
70+ static thrust::device_ptr<Intersection> devIntersectionsThr;
71+ static thrust::device_ptr<int > devIntersecMatKeysThr;
72+ static thrust::device_ptr<int > devSegmentMatKeysThr;
7073
7174void InitDataContainer (GuiDataContainer* imGuiData) {
7275 guiData = imGuiData;
@@ -88,6 +91,12 @@ void pathTraceInit(Scene* scene) {
8891
8992 cudaMalloc (&devIntersections, pixelcount * sizeof (Intersection));
9093 cudaMemset (devIntersections, 0 , pixelcount * sizeof (Intersection));
94+ devIntersectionsThr = thrust::device_ptr<Intersection>(devIntersections);
95+
96+ cudaMalloc (&devIntersecMatKeys, pixelcount * sizeof (int ));
97+ cudaMalloc (&devSegmentMatKeys, pixelcount * sizeof (int ));
98+ devIntersecMatKeysThr = thrust::device_ptr<int >(devIntersecMatKeys);
99+ devSegmentMatKeysThr = thrust::device_ptr<int >(devSegmentMatKeys);
91100
92101 checkCUDAError (" pathTraceInit" );
93102}
@@ -97,6 +106,8 @@ void pathTraceFree() {
97106 cudaFree (devPaths);
98107 cudaFree (devTerminatedPaths);
99108 cudaFree (devIntersections);
109+ cudaFree (devIntersecMatKeys);
110+ cudaFree (devSegmentMatKeys);
100111}
101112
102113/* *
@@ -152,17 +163,23 @@ __global__ void computeIntersections(
152163 int numPaths,
153164 PathSegment* pathSegments,
154165 DevScene* scene,
155- Intersection* intersections
166+ Intersection* intersections,
167+ int * materialKeys,
168+ bool sortMaterial
156169) {
157170 int pathIdx = blockIdx .x * blockDim .x + threadIdx .x ;
158171
159- if (pathIdx < numPaths) {
160- # if BVH_DEBUG_VISUALIZATION
161- scene-> visualizedIntersect (pathSegments[pathIdx]. ray , intersections[pathIdx]);
162- # else
172+ if (pathIdx >= numPaths) {
173+ return ;
174+ }
175+
163176 Intersection intersec;
164177 PathSegment segment = pathSegments[pathIdx];
178+ #if BVH_DISABLE
179+ scene->naiveIntersect (segment.ray , intersec);
180+ #else
165181 scene->intersect (segment.ray , intersec);
182+ #endif
166183
167184 if (intersec.primId != NullPrimitive) {
168185 if (scene->devMaterials [intersec.matId ].type == Material::Type::Light) {
@@ -176,15 +193,20 @@ __global__ void computeIntersections(
176193 // If not first ray, preserve previous sampling information for
177194 // MIS calculation
178195 intersec.prevPos = segment.ray .origin ;
196+ intersec.prev = segment.prev ;
179197 }
180198 }
181199 else {
182200 intersec.wo = -segment.ray .direction ;
183201 }
202+ if (sortMaterial) {
203+ materialKeys[pathIdx] = intersec.matId ;
204+ }
184205 }
185- intersections[pathIdx] = intersec;
186- # endif
206+ else if (sortMaterial) {
207+ materialKeys[pathIdx] = - 1 ;
187208 }
209+ intersections[pathIdx] = intersec;
188210}
189211
190212__global__ void computeTerminatedRays (
@@ -248,17 +270,19 @@ __global__ void pathIntegSampleSurface(
248270 glm::vec3 accRadiance (0 .f );
249271
250272 if (material.type == Material::Type::Light) {
273+ PrevBSDFSampleInfo prev = intersec.prev ;
274+
251275 glm::vec3 radiance = material.baseColor * material.emittance ;
252276 if (depth == 0 ) {
253277 accRadiance += radiance;
254278 }
255- else if (segment .deltaSample ) {
279+ else if (prev .deltaSample ) {
256280 accRadiance += radiance * segment.throughput ;
257281 }
258282 else {
259283 float lightPdf = Math::pdfAreaToSolidAngle (Math::luminance (radiance) * scene->sumLightPowerInv ,
260284 intersec.prevPos , intersec.pos , intersec.norm );
261- float BSDFPdf = segment .BSDFPdf ;
285+ float BSDFPdf = prev .BSDFPdf ;
262286 accRadiance += radiance * segment.throughput * Math::powerHeuristic (BSDFPdf, lightPdf);
263287 }
264288 segment.remainingBounces = 0 ;
@@ -293,8 +317,7 @@ __global__ void pathIntegSampleSurface(
293317 segment.throughput *= sample.bsdf / sample.pdf *
294318 (deltaSample ? 1 .f : Math::absDot (intersec.norm , sample.dir ));
295319 segment.ray = makeOffsetedRay (intersec.pos , sample.dir );
296- segment.BSDFPdf = sample.pdf ;
297- segment.deltaSample = deltaSample;
320+ segment.prev = { sample.pdf , deltaSample };
298321 segment.remainingBounces --;
299322 }
300323 }
@@ -403,14 +426,22 @@ void pathTrace(uchar4* pbo, int frame, int iter) {
403426 numPaths,
404427 devPaths,
405428 hstScene->devScene ,
406- devIntersections
429+ devIntersections,
430+ devIntersecMatKeys,
431+ Settings::sortMaterial
407432 );
408433 checkCUDAError (" PT::computeInteractions" );
409434 cudaDeviceSynchronize ();
410435
411436 // TODO: compare between directly shading the path segments and shading
412437 // path segments that have been reshuffled to be contiguous in memory.
413438
439+ if (Settings::sortMaterial) {
440+ cudaMemcpyDevToDev (devSegmentMatKeys, devIntersecMatKeys, numPaths * sizeof (int ));
441+ thrust::sort_by_key (devIntersecMatKeysThr, devIntersecMatKeysThr + numPaths, devIntersectionsThr);
442+ thrust::sort_by_key (devSegmentMatKeysThr, devSegmentMatKeysThr + numPaths, devPathsThr);
443+ }
444+
414445 pathIntegSampleSurface<<<numBlocksPathSegmentTracing, blockSize1D>>> (
415446 iter, depth, devPaths, devIntersections, hstScene->devScene , numPaths
416447 );
@@ -440,7 +471,7 @@ void pathTrace(uchar4* pbo, int frame, int iter) {
440471 // /////////////////////////////////////////////////////////////////////////
441472
442473 // Send results to OpenGL buffer for rendering
443- sendImageToPBO<<<blocksPerGrid2D, blockSize2D>>> (pbo, cam.resolution , iter, devImage, ToneMapping::method );
474+ sendImageToPBO<<<blocksPerGrid2D, blockSize2D>>> (pbo, cam.resolution , iter, devImage, Settings::toneMapping );
444475
445476 // Retrieve image from GPU
446477 cudaMemcpy (hstScene->state .image .data (), devImage,
0 commit comments