Skip to content

Commit 8ecd4e4

Browse files
refactor raytracer for the shading to happen in model space (less matrix transforms)
1 parent 8e45c47 commit 8ecd4e4

File tree

5 files changed

+35
-30
lines changed

5 files changed

+35
-30
lines changed

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,7 @@ core::smart_refctd_ptr<IGPUImageView> Renderer::createScreenSizedTexture(E_FORMA
622622
return m_driver->createGPUImageView(std::move(viewparams));
623623
}
624624

625-
constexpr uint16_t m_maxDepth = 2u;
625+
constexpr uint16_t m_maxDepth = 5u;
626626
constexpr uint16_t m_UNUSED_russianRouletteDepth = 5u;
627627
bool extractIntegratorInfo(const ext::MitsubaLoader::CElementIntegrator& integrator, uint32_t &bxdfSamples, uint32_t &maxNEESamples)
628628
{
@@ -1075,9 +1075,7 @@ void Renderer::deinit()
10751075
}
10761076
m_accumulation = m_tonemapOutput = nullptr;
10771077

1078-
// release the last OpenCL object and wait for OpenCL to finish
1079-
for (auto i=0; i<2u; i++)
1080-
ocl::COpenCLHandler::ocl.pclEnqueueReleaseGLObjects(commandQueue,1u,&m_rayCountBuffer[i].asRRBuffer.second,1u,nullptr,nullptr);
1078+
// wait for OpenCL to finish
10811079
ocl::COpenCLHandler::ocl.pclFlush(commandQueue);
10821080
ocl::COpenCLHandler::ocl.pclFinish(commandQueue);
10831081
for (auto i=0; i<2u; i++)
@@ -1330,11 +1328,7 @@ void Renderer::traceBounce()
13301328
descriptorSets[3] = m_closestHitDS.get();
13311329
m_driver->bindDescriptorSets(EPBP_COMPUTE,pipelineLayout,0u,4u,descriptorSets,nullptr);
13321330
m_driver->bindComputePipeline(m_closestHitPipeline.get());
1333-
// dont ask my why this fixes the dispatch indirect! (TODO: just download the raycount)
1334-
auto tmp = m_driver->createDeviceLocalGPUBufferOnDedMem(16u);
1335-
m_driver->copyBuffer(m_rayCountBuffer[readIx].buffer.get(),tmp.get(),0u,0u,16u);
1336-
m_driver->dispatchIndirect(tmp.get(),sizeof(uint32_t));
1337-
//m_driver->dispatchIndirect(m_rayCountBuffer[readIx].buffer.get(),sizeof(uint32_t));
1331+
m_driver->dispatchIndirect(m_rayCountBuffer[readIx].buffer.get(),sizeof(uint32_t));
13381332
}
13391333
else
13401334
{

examples_tests/22.RaytracedAO/closestHit.comp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,12 @@ void main()
5353
{
5454
const uint triangleID = intersection.primid;
5555
const uvec3 indices = get_triangle_indices(batchInstanceGUID,triangleID);
56-
56+
57+
const mat4x3 batchWorldTform = InstData.data[batchInstanceGUID].tform;
5758
// clear the hit success flag
5859
intersections[vertex_depth_mod_2].data[gl_GlobalInvocationID.x].shapeid = -1;
5960
// obtain ray incoming direction
60-
normalizedV = -ray.direction;
61+
normalizedV = -inverse(mat3(batchWorldTform))*ray.direction;
6162

6263
// positions
6364
const vec3 last_vx_pos = load_positions(indices,batchInstanceGUID);
@@ -74,15 +75,16 @@ void main()
7475
// if we ever support spatially varying emissive, we'll need to hoist barycentric computation and UV fetching to the position fetching
7576
const vec2 compactBary = intersection.uvwt.xy;
7677

77-
const mat2 dBarydScreen = mat2(0.0); // TODO: Covariance Rendering
78-
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = load_aux_vertex_attrs(
79-
compactBary,indices,batchInstanceGUID,material,dBarydScreen,outPixelLocation,vertex_depth
78+
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = load_aux_vertex_attrs(compactBary,indices,batchInstanceGUID,material,outPixelLocation,vertex_depth
79+
#ifdef TEX_PREFETCH_STREAM
80+
,mat2(0.0) // TODO: Covariance Rendering
81+
#endif
8082
);
8183

8284
const vec3 hitWorldPos = dPdBary*compactBary.xy+last_vx_pos;
8385

8486
generate_next_rays(
85-
MAX_RAYS_GENERATED,material,frontfacing,vertex_depth,
87+
MAX_RAYS_GENERATED,batchWorldTform,material,frontfacing,vertex_depth,
8688
scramble_start_state,sampleID,outPixelLocation,hitWorldPos,geomNormal,throughput
8789
);
8890
}

examples_tests/22.RaytracedAO/cull.comp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ void main()
4848
const uint batchInstanceGUID = batchInstanceData.batchInstanceGUID;
4949

5050
const nbl_glsl_ext_Mitsuba_Loader_instance_data_t instanceData = InstData.data[batchInstanceGUID];
51-
const mat4x3 worldMatrix = InstData.data[batchInstanceGUID].tform;
51+
const mat4x3 worldMatrix = instanceData.tform;
5252
const mat4 MVP = nbl_glsl_pseudoMul4x4with4x3(pc.data.viewProjMatrix,worldMatrix);
5353

5454
// cull

examples_tests/22.RaytracedAO/raygen.comp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ void main()
3636
// vis buffer read
3737
const uvec4 visBuffer = texelFetch(frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2,ivec2(outPixelLocation),0);
3838

39-
// unproject
39+
// unproject (TODO: redo)
4040
vec3 hitWorldPos;
4141
{
4242
const vec3 NDC = vec3(vec2(outPixelLocation)*staticViewData.rcpPixelSize+staticViewData.rcpHalfPixelSize,1.0-revdepth);
@@ -57,7 +57,11 @@ void main()
5757
#ifdef TEX_PREFETCH_STREAM
5858
const mat2 dBarydScreen = mat2(unpackHalf2x16(visBuffer[2]),unpackHalf2x16(visBuffer[3]));
5959
#endif
60-
60+
61+
// TODO: redo
62+
const mat4x3 batchWorldTform = InstData.data[batchInstanceGUID].tform;
63+
hitWorldPos = inverse(mat3(batchWorldTform))*(hitWorldPos-batchWorldTform[3]);
64+
normalizedV = inverse(mat3(batchWorldTform))*normalizedV;
6165

6266
const uvec3 indices = get_triangle_indices(batchInstanceGUID,triangleID);
6367

@@ -68,16 +72,18 @@ void main()
6872
const uint vertex_depth_mod_2 = 0x1u;
6973
// load vertex data
7074
load_positions(indices,batchInstanceGUID);
71-
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = load_aux_vertex_attrs(
72-
compactBary,indices,batchInstanceGUID,material,dBarydScreen,outPixelLocation,vertex_depth_mod_2
75+
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = load_aux_vertex_attrs(compactBary,indices,batchInstanceGUID,material,outPixelLocation,vertex_depth_mod_2
76+
#ifdef TEX_PREFETCH_STREAM
77+
,dBarydScreen
78+
#endif
7379
);
7480

7581
// generate rays
7682
const uint sampleID = bitfieldExtract(pc.cummon.samplesComputed_depth,0,16);
7783
const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]);
7884
const uint vertex_depth = 1u;
7985
generate_next_rays(
80-
staticViewData.samplesPerPixelPerDispatch,material,frontfacing,vertex_depth,
86+
staticViewData.samplesPerPixelPerDispatch,batchWorldTform,material,frontfacing,vertex_depth,
8187
scramble_start_state,sampleID,outPixelLocation,hitWorldPos,geomNormal,vec3(1.0)
8288
);
8389
}

examples_tests/22.RaytracedAO/raytraceCommon.glsl

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,12 @@ void gen_sample_ray(
171171
}
172172

173173
nbl_glsl_xoroshiro64star_state_t load_aux_vertex_attrs(
174-
in vec2 compactBary, in uvec3 indices, in uint batchInstanceGUID, in nbl_glsl_MC_oriented_material_t material,
175-
in mat2 dBarydScreen, in uvec2 outPixelLocation, in uint vertex_depth_mod_2
174+
in vec2 compactBary, in uvec3 indices, in uint batchInstanceGUID,
175+
in nbl_glsl_MC_oriented_material_t material,
176+
in uvec2 outPixelLocation, in uint vertex_depth_mod_2
177+
#ifdef TEX_PREFETCH_STREAM
178+
,in mat2 dBarydScreen
179+
#endif
176180
)
177181
{
178182
// if we ever support spatially varying emissive, we'll need to hoist barycentric computation and UV fetching to the position fetching
@@ -199,22 +203,19 @@ nbl_glsl_xoroshiro64star_state_t load_aux_vertex_attrs(
199203
#endif
200204
// not needed for NEE unless doing Area or Projected Solid Angle Sampling
201205
const vec3 normal = normals*nbl_glsl_barycentric_expand(compactBary);
202-
normalizedN.x = dot(InstData.data[batchInstanceGUID].normalMatrixRow0,normal);
203-
normalizedN.y = dot(InstData.data[batchInstanceGUID].normalMatrixRow1,normal);
204-
normalizedN.z = dot(InstData.data[batchInstanceGUID].normalMatrixRow2,normal);
205206

206207
// init scramble while waiting for getting the instance's normal matrix
207208
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = imageLoad(scramblebuf,ivec3(outPixelLocation,vertex_depth_mod_2)).rg;
208209

209210
// while waiting for the scramble state
210-
normalizedN = normalize(normalizedN);
211+
normalizedN = normalize(normal);
211212

212213
return scramble_start_state;
213214
}
214215

215216
void generate_next_rays(
216-
in uint maxRaysToGen, in nbl_glsl_MC_oriented_material_t material, in bool frontfacing, in uint vertex_depth,
217-
in nbl_glsl_xoroshiro64star_state_t scramble_start_state, in uint sampleID, in uvec2 outPixelLocation,
217+
in uint maxRaysToGen, in mat4x3 batchWorldTform, in nbl_glsl_MC_oriented_material_t material, in bool frontfacing,
218+
in uint vertex_depth, in nbl_glsl_xoroshiro64star_state_t scramble_start_state, in uint sampleID, in uvec2 outPixelLocation,
218219
in vec3 origin, vec3 geomNormal, in vec3 prevThroughput)
219220
{
220221
// get material streams as well
@@ -252,6 +253,8 @@ void generate_next_rays(
252253
const uint baseOutputID = atomicAdd(traceIndirect[vertex_depth_mod_2_inv].rayCount,raysToAllocate);
253254
// set up dispatch indirect
254255
atomicMax(traceIndirect[vertex_depth_mod_2_inv].params.num_groups_x,(baseOutputID+raysToAllocate-1u)/WORKGROUP_SIZE+1u);
256+
257+
const mat3 batchWorldScaleRot = mat3(batchWorldTform);
255258
uint offset = 0u;
256259
for (uint i=0u; i<maxRaysToGen; i++)
257260
if (maxT[i]!=0.f)
@@ -261,7 +264,7 @@ void generate_next_rays(
261264
const float err = 1.f/96.f;
262265
newRay.origin = origin+/*geomNormal/max(max(geomNormal.x,geomNormal.y),geomNormal.z)*sign(dot(geomNormal,direction[i]))*/direction[i]*err;
263266
newRay.maxT = maxT[i];
264-
newRay.direction = direction[i];
267+
newRay.direction = batchWorldScaleRot*direction[i];
265268
newRay.time = packOutPixelLocation(outPixelLocation);
266269
newRay.mask = -1;
267270
newRay._active = 1;

0 commit comments

Comments
 (0)