1
+ #ifndef _RAYTRACE_COMMON_GLSL_INCLUDED_
2
+ #define _RAYTRACE_COMMON_GLSL_INCLUDED_
3
+
4
+ #include "virtualGeometry.glsl"
5
+
6
+
7
+ layout (push_constant, row_major) uniform PushConstants
8
+ {
9
+ RaytraceShaderCommonData_t cummon;
10
+ } pc;
11
+
12
+ // lights
13
+ layout (set = 1 , binding = 3 , std430) restrict readonly buffer CumulativeLightPDF
14
+ {
15
+ uint lightCDF[];
16
+ };
17
+ layout (set = 1 , binding = 4 , std430, row_major) restrict readonly buffer Lights
18
+ {
19
+ SLight light[];
20
+ };
21
+
22
+ layout (set = 2 , binding = 0 , row_major) uniform StaticViewData
23
+ {
24
+ StaticViewData_t staticViewData;
25
+ };
26
+ // rng
27
+ layout (set = 2 , binding = 1 , rg32ui) uniform uimage2DArray scramblebuf;
28
+ layout (set = 2 , binding = 2 ) uniform usamplerBuffer sampleSequence;
29
+ // accumulation
30
+ layout (set = 2 , binding = 3 , rg32ui) restrict uniform uimage2DArray accumulation;
31
+ // ray data
32
+ #include < nbl/ builtin/ glsl/ ext/ RadeonRays/ ray.glsl>
33
+ layout (set = 2 , binding = 4 , std430) restrict writeonly buffer SinkRays
34
+ {
35
+ nbl_glsl_ext_RadeonRays_ray sinkRays[];
36
+ };
37
+ #include < nbl/ builtin/ glsl/ utils/ indirect_commands.glsl>
38
+ layout (set = 2 , binding = 5 ) restrict coherent buffer RayCount // maybe remove coherent keyword
39
+ {
40
+ uint rayCount[RAYCOUNT_N_BUFFERING];
41
+ };
42
+
43
+ void clear_raycount()
44
+ {
45
+ if (all (equal (uvec3 (0u),gl_GlobalInvocationID)))
46
+ rayCount[(pc.cummon.rayCountWriteIx+ 1u)& uint (RAYCOUNT_N_BUFFERING_MASK)] = 0u;
47
+ }
48
+
49
+ //
50
+ uvec3 get_triangle_indices(in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData, in uint triangleID)
51
+ {
52
+ const uint baseTriangleVertex = triangleID* 3u+ batchInstanceData.padding0;
53
+ return uvec3 (
54
+ nbl_glsl_VG_fetchTriangleVertexIndex(baseTriangleVertex,0u),
55
+ nbl_glsl_VG_fetchTriangleVertexIndex(baseTriangleVertex,1u),
56
+ nbl_glsl_VG_fetchTriangleVertexIndex(baseTriangleVertex,2u)
57
+ );
58
+ }
59
+
60
+ // for per pixel inputs
61
+ #include < nbl/ builtin/ glsl/ random/ xoroshiro.glsl>
62
+ #include < nbl/ builtin/ glsl/ utils/ transform.glsl>
63
+
64
+ #include < nbl/ builtin/ glsl/ format/ decode.glsl>
65
+ #include < nbl/ builtin/ glsl/ format/ encode.glsl>
66
+ vec3 fetchAccumulation(in uvec3 coord)
67
+ {
68
+ const uvec2 data = imageLoad(accumulation,ivec3 (coord)).rg;
69
+ return nbl_glsl_decodeRGB19E7(data);
70
+ }
71
+ void storeAccumulation(in vec3 color, in uvec3 coord)
72
+ {
73
+ const uvec2 data = nbl_glsl_encodeRGB19E7(color);
74
+ imageStore(accumulation,ivec3 (coord),uvec4 (data,0u,0u));
75
+ }
76
+
77
+ bool record_emission_common(out vec3 acc, in uvec3 accumulationLocation, vec3 emissive, in bool first_accumulating_path_vertex)
78
+ {
79
+ acc = vec3 (0.0 );
80
+ const bool notFirstFrame = pc.cummon.rcpFramesDispatched!= 1 .f;
81
+ if (! first_accumulating_path_vertex || notFirstFrame)
82
+ acc = fetchAccumulation(accumulationLocation);
83
+ if (first_accumulating_path_vertex) // a bit useless to add && notFirstFrame) its a tautology with acc=vec3(0.0)
84
+ emissive -= acc;
85
+ emissive *= pc.cummon.rcpFramesDispatched;
86
+
87
+ const bool anyChange = any (greaterThan (abs (emissive),vec3 (nbl_glsl_FLT_MIN)));
88
+ acc += emissive;
89
+ return anyChange;
90
+ }
91
+
92
+
93
+
94
+ float packOutPixelLocation(in uvec2 outPixelLocation)
95
+ {
96
+ return uintBitsToFloat(bitfieldInsert(outPixelLocation.x,outPixelLocation.y,16 ,16 ));
97
+ }
98
+ uvec2 unpackOutPixelLocation(in float packed)
99
+ {
100
+ const uint asUint = floatBitsToUint(packed);
101
+ return uvec2 (asUint& 0xffffu,asUint>> 16u);
102
+ }
103
+
104
+ #include "bin/ runtime_defines.glsl"
105
+ #include < nbl/ builtin/ glsl/ ext/ MitsubaLoader/ material_compiler_compatibility_impl.glsl>
106
+ vec3 normalizedV;
107
+ vec3 nbl_glsl_MC_getNormalizedWorldSpaceV()
108
+ {
109
+ return normalizedV;
110
+ }
111
+ vec3 normalizedN;
112
+ vec3 nbl_glsl_MC_getNormalizedWorldSpaceN()
113
+ {
114
+ return normalizedN;
115
+ }
116
+
117
+ #include < nbl/ builtin/ glsl/ barycentric/ utils.glsl>
118
+ mat2x3 dPdBary;
119
+ vec3 load_positions(in uvec3 indices, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData)
120
+ {
121
+ mat3 positions = mat3 (
122
+ nbl_glsl_fetchVtxPos(indices[0 ],batchInstanceData),
123
+ nbl_glsl_fetchVtxPos(indices[1 ],batchInstanceData),
124
+ nbl_glsl_fetchVtxPos(indices[2 ],batchInstanceData)
125
+ );
126
+ const mat4x3 tform = batchInstanceData.tform;
127
+ positions = mat3 (tform)* positions;
128
+ //
129
+ for (int i= 0 ; i< 2 ; i++ )
130
+ dPdBary[i] = positions[i]- positions[2 ];
131
+ return positions[2 ]+ tform[3 ];
132
+ }
133
+
134
+ #ifdef TEX_PREFETCH_STREAM
135
+ mat2x3 nbl_glsl_perturbNormal_dPdSomething()
136
+ {
137
+ return dPdBary;
138
+ }
139
+ mat2 dUVdBary;
140
+ mat2 nbl_glsl_perturbNormal_dUVdSomething()
141
+ {
142
+ return dUVdBary;
143
+ }
144
+ #endif
145
+ #define _NBL_USER_PROVIDED_MATERIAL_COMPILER_GLSL_BACKEND_FUNCTIONS_
146
+ #include < nbl/ builtin/ glsl/ material_compiler/ common.glsl>
147
+
148
+ nbl_glsl_xoroshiro64star_state_t load_aux_vertex_attrs(
149
+ in vec2 compactBary, in uvec3 indices, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData,
150
+ in nbl_glsl_MC_oriented_material_t material,
151
+ in uvec2 outPixelLocation, in uint vertex_depth_mod_2
152
+ #ifdef TEX_PREFETCH_STREAM
153
+ ,in mat2 dBarydScreen
154
+ #endif
155
+ )
156
+ {
157
+ // if we ever support spatially varying emissive, we'll need to hoist barycentric computation and UV fetching to the position fetching
158
+ #ifdef TEX_PREFETCH_STREAM
159
+ const mat3x2 uvs = mat3x2 (
160
+ nbl_glsl_fetchVtxUV(indices[0 ],batchInstanceData),
161
+ nbl_glsl_fetchVtxUV(indices[1 ],batchInstanceData),
162
+ nbl_glsl_fetchVtxUV(indices[2 ],batchInstanceData)
163
+ );
164
+ const nbl_glsl_MC_instr_stream_t tps = nbl_glsl_MC_oriented_material_t_getTexPrefetchStream(material);
165
+ #endif
166
+ // only needed for continuing
167
+ const mat3 normals = mat3 (
168
+ nbl_glsl_fetchVtxNormal(indices[0 ],batchInstanceData),
169
+ nbl_glsl_fetchVtxNormal(indices[1 ],batchInstanceData),
170
+ nbl_glsl_fetchVtxNormal(indices[2 ],batchInstanceData)
171
+ );
172
+
173
+ #ifdef TEX_PREFETCH_STREAM
174
+ dUVdBary = mat2 (uvs[0 ]- uvs[2 ],uvs[1 ]- uvs[2 ]);
175
+ const vec2 UV = dUVdBary* compactBary+ uvs[2 ];
176
+ const mat2 dUVdScreen = nbl_glsl_applyChainRule2D(dUVdBary,dBarydScreen);
177
+ nbl_glsl_MC_runTexPrefetchStream(tps,UV,dUVdScreen);
178
+ #endif
179
+ // not needed for NEE unless doing Area or Projected Solid Angle Sampling
180
+ const vec3 normal = normals* nbl_glsl_barycentric_expand(compactBary);
181
+
182
+ // init scramble while waiting for getting the instance's normal matrix
183
+ const nbl_glsl_xoroshiro64star_state_t scramble_start_state = imageLoad(scramblebuf,ivec3 (outPixelLocation,1u/* vertex_depth_mod_2*/ )).rg;
184
+
185
+ // while waiting for the scramble state
186
+ normalizedN.x = dot (batchInstanceData.normalMatrixRow0,normal);
187
+ normalizedN.y = dot (batchInstanceData.normalMatrixRow1,normal);
188
+ normalizedN.z = dot (batchInstanceData.normalMatrixRow2,normal);
189
+ normalizedN = normalize (normalizedN);
190
+
191
+ return scramble_start_state;
192
+ }
193
+
194
+ vec3 rand3d(inout nbl_glsl_xoroshiro64star_state_t scramble_state, in int _sample, in int depth)
195
+ {
196
+ uvec3 seqVal = texelFetch(sampleSequence,int (_sample)+ (depth- 1 )* MAX_ACCUMULATED_SAMPLES).xyz;
197
+ seqVal ^= uvec3 (nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state));
198
+ return vec3 (seqVal)* uintBitsToFloat(0x2f800004u);
199
+ }
200
+
201
+ void gen_sample_ray(
202
+ out float maxT, out vec3 direction, out vec3 throughput,
203
+ inout nbl_glsl_xoroshiro64star_state_t scramble_state, in uint sampleID, in uint depth,
204
+ in nbl_glsl_MC_precomputed_t precomp, in nbl_glsl_MC_instr_stream_t gcs, in nbl_glsl_MC_instr_stream_t rnps
205
+ )
206
+ {
207
+ maxT = nbl_glsl_FLT_MAX;
208
+
209
+ vec3 rand = rand3d(scramble_state,int (sampleID),int (depth));
210
+
211
+ float pdf;
212
+ nbl_glsl_LightSample s;
213
+ throughput = nbl_glsl_MC_runGenerateAndRemainderStream(precomp,gcs,rnps,rand,pdf,s);
214
+
215
+ direction = s.L;
216
+ }
217
+
218
+
219
+ void generate_next_rays(
220
+ in uint maxRaysToGen, in nbl_glsl_MC_oriented_material_t material, in bool frontfacing, in uint vertex_depth,
221
+ in nbl_glsl_xoroshiro64star_state_t scramble_start_state, in uint sampleID, in uvec2 outPixelLocation,
222
+ in vec3 origin, in vec3 prevThroughput)
223
+ {
224
+ // get material streams as well
225
+ const nbl_glsl_MC_instr_stream_t gcs = nbl_glsl_MC_oriented_material_t_getGenChoiceStream(material);
226
+ const nbl_glsl_MC_instr_stream_t rnps = nbl_glsl_MC_oriented_material_t_getRemAndPdfStream(material);
227
+
228
+
229
+ // need to do this after we have worldspace V and N ready
230
+ const nbl_glsl_MC_precomputed_t precomputed = nbl_glsl_MC_precomputeData(frontfacing);
231
+ #ifdef NORM_PRECOMP_STREAM
232
+ const nbl_glsl_MC_instr_stream_t nps = nbl_glsl_MC_oriented_material_t_getNormalPrecompStream(material);
233
+ nbl_glsl_MC_runNormalPrecompStream(nps,precomputed);
234
+ #endif
235
+
236
+ const uint vertex_depth_mod_2 = vertex_depth& 0x1u;
237
+ const uint vertex_depth_mod_2_inv = vertex_depth_mod_2^ 0x1u;
238
+ // prepare rays
239
+ uint raysToAllocate = 0u;
240
+ float maxT[MAX_RAYS_GENERATED]; vec3 direction[MAX_RAYS_GENERATED]; vec3 nextThroughput[MAX_RAYS_GENERATED];
241
+ for (uint i= 1u; i!= vertex_depth; i++ )
242
+ {
243
+ nbl_glsl_xoroshiro64star(scramble_start_state);
244
+ nbl_glsl_xoroshiro64star(scramble_start_state);
245
+ nbl_glsl_xoroshiro64star(scramble_start_state);
246
+ }
247
+ for (uint i= 0u; i< maxRaysToGen; i++ )
248
+ {
249
+ nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
250
+ // TODO: When generating NEE rays, advance the dimension, NOT the sampleID
251
+ gen_sample_ray(maxT[i],direction[i],nextThroughput[i],scramble_state,sampleID+ i,vertex_depth,precomputed,gcs,rnps);
252
+ // TODO: bad idea, invent something else
253
+ // if (i==0u)
254
+ // imageStore(scramblebuf,ivec3(outPixelLocation,vertex_depth_mod_2_inv),uvec4(scramble_state,0u,0u));
255
+ nextThroughput[i] *= prevThroughput;
256
+ if (max (max (nextThroughput[i].x,nextThroughput[i].y),nextThroughput[i].z)> exp2 (- 19 .f)) // TODO: reverse tonemap to adjust the threshold
257
+ raysToAllocate++ ;
258
+ else
259
+ maxT[i] = 0 .f;
260
+ }
261
+ // TODO: investigate workgroup reductions here
262
+ const uint baseOutputID = atomicAdd(rayCount[pc.cummon.rayCountWriteIx],raysToAllocate);
263
+
264
+ // the 1.03125f adjusts for the fact that the normal might be too short (inversesqrt precision)
265
+ const float inversesqrt_precision = 1 .03125f;
266
+ // TODO: investigate why we can't use `normalizedN` here
267
+ const vec3 ray_offset_vector = normalize (cross (dPdBary[0 ],dPdBary[1 ]))* inversesqrt_precision;
268
+ float origin_offset = nbl_glsl_numeric_limits_float_epsilon(44u); // I pulled the constants out of my @$$
269
+ origin_offset += dot (abs (ray_offset_vector),abs (origin))* nbl_glsl_numeric_limits_float_epsilon(32u);
270
+ // TODO: in the future run backward error analysis of
271
+ // dot(mat3(WorldToObj)*(origin+offset*geomNormal/length(geomNormal))+(WorldToObj-vx_pos[1]),geomNormal)
272
+ // where
273
+ // origin = mat3x2(vx_pos[2]-vx_pos[1],vx_pos[0]-vx_pos[1])*barys+vx_pos[1]
274
+ // geonNormal = cross(vx_pos[2]-vx_pos[1],vx_pos[0]-vx_pos[1])
275
+ // and we assume only `WorldToObj`, `vx_pos[i]` and `barys` are accurate values. So far:
276
+ // offset > (1+gamma(2))/(1-gamma(2))*(dot(abs(geomNormal),omega_error)+dot(abs(omega),geomNormal_error)+dot(omega_error,geomNormal_error))
277
+ // const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]);
278
+ // float ray_offset = ?;
279
+ // ray_offset = nbl_glsl_ieee754_next_ulp_away_from_zero(ray_offset);
280
+ const vec3 ray_offset = ray_offset_vector* origin_offset;
281
+ const vec3 ray_origin[2 ] = {origin+ ray_offset,origin- ray_offset};
282
+ uint offset = 0u;
283
+ for (uint i= 0u; i< maxRaysToGen; i++ )
284
+ if (maxT[i]!= 0 .f)
285
+ {
286
+ nbl_glsl_ext_RadeonRays_ray newRay;
287
+ if (dot (ray_offset_vector,direction[i])< 0 .f)
288
+ newRay.origin = ray_origin[1 ];
289
+ else
290
+ newRay.origin = ray_origin[0 ];
291
+ newRay.maxT = maxT[i];
292
+ newRay.direction = direction[i];
293
+ newRay.time = packOutPixelLocation(outPixelLocation);
294
+ newRay.mask = - 1 ;
295
+ newRay._active = 1 ;
296
+ newRay.useless_padding[0 ] = packHalf2x16(nextThroughput[i].rg);
297
+ newRay.useless_padding[1 ] = bitfieldInsert(packHalf2x16(nextThroughput[i].bb),sampleID+ i,16 ,16 );
298
+ const uint outputID = baseOutputID+ (offset++ );
299
+ sinkRays[outputID] = newRay;
300
+ }
301
+ }
302
+
303
+ /* TODO: optimize and reorganize
304
+ void main()
305
+ {
306
+ clear_raycount();
307
+ const bool alive = useful_invocation();
308
+ uint raysToAllocate = 0u;
309
+ vec3 emissive;
310
+ if (alive)
311
+ {
312
+ emissive = staticViewData.envmapBaseColor;
313
+
314
+ raysToAllocate = main_prolog(emissive,...);
315
+ }
316
+
317
+ const uint raysLocalEnd = nbl_glsl_workgroupInclusiveAdd(raysToAllocate);
318
+ uint baseOutputID;
319
+ if (gl_LocalInvocationIndex==WORKGROUP_SIZE-1)
320
+ baseOutputID = atomicAdd(rayCount[pc.cummon.rayCountWriteIx],raysLocalEnd);
321
+ baseOutputID = nbl_glsl_workgroupBroadcast(baseOutputID,WORKGROUP_SIZE-1);
322
+
323
+ // coalesce rays
324
+ for ()
325
+ {
326
+ }
327
+ // write them out to global mem
328
+ for ()
329
+ {
330
+ }
331
+
332
+ if (alive)
333
+ {
334
+ // store accumulation
335
+ main_epilog();
336
+ }
337
+ }
338
+ */
339
+ #endif
0 commit comments