Skip to content

Commit 264bac3

Browse files
"optimize" out matrix inverses and fix bugs
1 parent da9deae commit 264bac3

File tree

3 files changed

+32
-22
lines changed

3 files changed

+32
-22
lines changed

examples_tests/22.RaytracedAO/closestHit.comp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,21 +52,22 @@ void main()
5252
if (hit)
5353
{
5454
const uint triangleID = intersection.primid;
55-
const uvec3 indices = get_triangle_indices(batchInstanceGUID,triangleID);
56-
57-
const mat4x3 batchWorldTform = InstData.data[batchInstanceGUID].tform;
55+
const nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData = InstData.data[batchInstanceGUID];
5856
// clear the hit success flag
5957
intersections[vertex_depth_mod_2].data[gl_GlobalInvocationID.x].shapeid = -1;
58+
const uvec3 indices = get_triangle_indices(batchInstanceGUID,triangleID);
59+
6060
// obtain ray incoming direction
61-
normalizedV = -inverse(mat3(batchWorldTform))*ray.direction;
61+
normalizedV = -mat3(batchInstanceData.normalMatrixRow0,batchInstanceData.normalMatrixRow1,batchInstanceData.normalMatrixRow2)*ray.direction;
62+
normalizedV = normalize(normalizedV); // doesn't non-uniform scale screw up BxDF eval and generation?
6263

6364
// positions
6465
const vec3 last_vx_pos = load_positions(indices,batchInstanceGUID);
6566
const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]);
6667
const bool frontfacing = dot(geomNormal,normalizedV)>0.f;
6768

6869
// get material
69-
const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(InstData.data[batchInstanceGUID].material,frontfacing);
70+
const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing);
7071
emissive = nbl_glsl_MC_oriented_material_t_getEmissive(material);
7172

7273
const bool _continue = vertex_depth!=MAX_PATH_DEPTH && ray.maxT==FLT_MAX; // last vertex or was a NEE path
@@ -84,7 +85,7 @@ void main()
8485
const vec3 hitWorldPos = dPdBary*compactBary.xy+last_vx_pos;
8586

8687
generate_next_rays(
87-
MAX_RAYS_GENERATED,batchWorldTform,material,frontfacing,vertex_depth,
88+
MAX_RAYS_GENERATED,batchInstanceData,material,frontfacing,vertex_depth,
8889
scramble_start_state,sampleID,outPixelLocation,hitWorldPos,geomNormal,throughput
8990
);
9091
}

examples_tests/22.RaytracedAO/raygen.comp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,14 @@ void main()
3737
const uvec4 visBuffer = texelFetch(frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2,ivec2(outPixelLocation),0);
3838

3939
// unproject (TODO: redo)
40-
vec3 hitWorldPos;
40+
vec3 hitWorldPos,V;
4141
{
4242
const vec3 NDC = vec3(vec2(outPixelLocation)*staticViewData.rcpPixelSize+staticViewData.rcpHalfPixelSize,1.0-revdepth);
4343

4444
const vec4 tmp = nbl_glsl_pseudoMul4x4with3x1(pc.cummon.inverseMVP,NDC);
4545
hitWorldPos = tmp.xyz/tmp.w;
4646

47-
const vec3 V = nbl_glsl_pseudoMul3x4with3x1(pc.cummon.ndcToV,NDC);
48-
normalizedV = normalize(V);
47+
V = nbl_glsl_pseudoMul3x4with3x1(pc.cummon.ndcToV,NDC);
4948
}
5049

5150
// vis buffer decode
@@ -58,15 +57,18 @@ void main()
5857
const mat2 dBarydScreen = mat2(unpackHalf2x16(visBuffer[2]),unpackHalf2x16(visBuffer[3]));
5958
#endif
6059

61-
// TODO: redo
62-
const mat4x3 batchWorldTform = InstData.data[batchInstanceGUID].tform;
63-
hitWorldPos = inverse(mat3(batchWorldTform))*(hitWorldPos-batchWorldTform[3]);
64-
normalizedV = inverse(mat3(batchWorldTform))*normalizedV;
65-
60+
const nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData = InstData.data[batchInstanceGUID];
6661
const uvec3 indices = get_triangle_indices(batchInstanceGUID,triangleID);
62+
63+
// TODO: redo
64+
{
65+
const mat3 batchScaleRotInv = mat3(batchInstanceData.normalMatrixRow0,batchInstanceData.normalMatrixRow1,batchInstanceData.normalMatrixRow2);
66+
hitWorldPos = batchScaleRotInv*(hitWorldPos-batchInstanceData.tform[3]);
67+
normalizedV = normalize(batchScaleRotInv*V); // doesn't non-uniform scale screw up BxDF eval and generation?
68+
}
6769

6870
// get material while waiting for indices
69-
const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(InstData.data[batchInstanceGUID].material,frontfacing);
71+
const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing);
7072
emissive = nbl_glsl_MC_oriented_material_t_getEmissive(material);
7173

7274
const uint vertex_depth_mod_2 = 0x1u;
@@ -83,7 +85,7 @@ void main()
8385
const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]);
8486
const uint vertex_depth = 1u;
8587
generate_next_rays(
86-
staticViewData.samplesPerPixelPerDispatch,batchWorldTform,material,frontfacing,vertex_depth,
88+
staticViewData.samplesPerPixelPerDispatch,batchInstanceData,material,frontfacing,vertex_depth,
8789
scramble_start_state,sampleID,outPixelLocation,hitWorldPos,geomNormal,vec3(1.0)
8890
);
8991
}

examples_tests/22.RaytracedAO/raytraceCommon.glsl

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,10 @@ nbl_glsl_xoroshiro64star_state_t load_aux_vertex_attrs(
210210
}
211211

212212
void generate_next_rays(
213-
in uint maxRaysToGen, in mat4x3 batchWorldTform, in nbl_glsl_MC_oriented_material_t material, in bool frontfacing,
214-
in uint vertex_depth, in nbl_glsl_xoroshiro64star_state_t scramble_start_state, in uint sampleID, in uvec2 outPixelLocation,
215-
in vec3 origin, vec3 geomNormal, in vec3 prevThroughput)
213+
in uint maxRaysToGen, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData,
214+
in nbl_glsl_MC_oriented_material_t material, in bool frontfacing, in uint vertex_depth,
215+
in nbl_glsl_xoroshiro64star_state_t scramble_start_state, in uint sampleID, in uvec2 outPixelLocation,
216+
in vec3 pos, vec3 geomNormal, in vec3 prevThroughput)
216217
{
217218
// get material streams as well
218219
const nbl_glsl_MC_instr_stream_t gcs = nbl_glsl_MC_oriented_material_t_getGenChoiceStream(material);
@@ -231,6 +232,12 @@ void generate_next_rays(
231232
// prepare rays
232233
uint raysToAllocate = 0u;
233234
float maxT[MAX_RAYS_GENERATED]; vec3 direction[MAX_RAYS_GENERATED]; vec3 nextThroughput[MAX_RAYS_GENERATED];
235+
for (uint i=1u; i!=vertex_depth; i++)
236+
{
237+
nbl_glsl_xoroshiro64star(scramble_start_state);
238+
nbl_glsl_xoroshiro64star(scramble_start_state);
239+
nbl_glsl_xoroshiro64star(scramble_start_state);
240+
}
234241
for (uint i=0u; i<maxRaysToGen; i++)
235242
{
236243
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
@@ -250,17 +257,17 @@ void generate_next_rays(
250257
// set up dispatch indirect
251258
atomicMax(traceIndirect[vertex_depth_mod_2_inv].params.num_groups_x,(baseOutputID+raysToAllocate-1u)/WORKGROUP_SIZE+1u);
252259

253-
const mat3 batchWorldScaleRot = mat3(batchWorldTform);
260+
const mat4x3 batchWorldTform = batchInstanceData.tform;
254261
uint offset = 0u;
255262
for (uint i=0u; i<maxRaysToGen; i++)
256263
if (maxT[i]!=0.f)
257264
{
258265
nbl_glsl_ext_RadeonRays_ray newRay;
259266
// TODO: improve ray offsets
260267
const float err = 1.f/96.f;
261-
newRay.origin = origin+/*geomNormal/max(max(geomNormal.x,geomNormal.y),geomNormal.z)*sign(dot(geomNormal,direction[i]))*/direction[i]*err;
268+
newRay.origin = mat3(batchWorldTform)*(pos+/*geomNormal/max(max(geomNormal.x,geomNormal.y),geomNormal.z)*sign(dot(geomNormal,direction[i]))*/direction[i]*err)+batchWorldTform[3];
262269
newRay.maxT = maxT[i];
263-
newRay.direction = batchWorldScaleRot*direction[i];
270+
newRay.direction = mat3(batchWorldTform)*direction[i]; // normalize after ? (doesn't non-uniform scale screw up BxDF eval and generation?)
264271
newRay.time = packOutPixelLocation(outPixelLocation);
265272
newRay.mask = -1;
266273
newRay._active = 1;

0 commit comments

Comments
 (0)