Skip to content

Commit 5c0dbf9

Browse files
fix more of the bad merge
1 parent cf70c2d commit 5c0dbf9

File tree

3 files changed

+341
-3
lines changed

3 files changed

+341
-3
lines changed

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
136136
}
137137

138138
{
139-
constexpr auto raytracingCommonDescriptorCount = 4u;
139+
constexpr auto raytracingCommonDescriptorCount = 6u;
140140
IGPUDescriptorSetLayout::SBinding bindings[raytracingCommonDescriptorCount];
141141
fillIotaDescriptorBindingDeclarations(bindings,ISpecializedShader::ESS_COMPUTE,raytracingCommonDescriptorCount);
142142
bindings[0].type = asset::EDT_UNIFORM_BUFFER;
@@ -1380,7 +1380,6 @@ uint32_t Renderer::traceBounce(uint32_t raycount)
13801380

13811381
auto commandQueue = m_rrManager->getCLCommandQueue();
13821382
const cl_mem clObjects[] = {m_rayBuffer[descSetIx].asRRBuffer.second,m_intersectionBuffer[descSetIx].asRRBuffer.second};
1383-
13841383
const auto objCount = sizeof(clObjects)/sizeof(cl_mem);
13851384
cl_event acquired=nullptr, raycastDone=nullptr;
13861385
// run the raytrace queries

examples_tests/22.RaytracedAO/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ int main()
266266

267267
auto extent = renderer->getSceneBound().getExtent();
268268
// want dynamic camera or not?
269-
if (false)
269+
if (true)
270270
{
271271
core::vector3df_SIMD ptu[] = {core::vectorSIMDf().set(camera->getPosition()),camera->getTarget(),camera->getUpVector()};
272272
auto proj = camera->getProjectionMatrix();
Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
#ifndef _RAYTRACE_COMMON_GLSL_INCLUDED_
2+
#define _RAYTRACE_COMMON_GLSL_INCLUDED_
3+
4+
#include "virtualGeometry.glsl"
5+
6+
7+
layout(push_constant, row_major) uniform PushConstants
8+
{
9+
RaytraceShaderCommonData_t cummon;
10+
} pc;
11+
12+
// lights
13+
layout(set = 1, binding = 3, std430) restrict readonly buffer CumulativeLightPDF
14+
{
15+
uint lightCDF[];
16+
};
17+
layout(set = 1, binding = 4, std430, row_major) restrict readonly buffer Lights
18+
{
19+
SLight light[];
20+
};
21+
22+
layout(set = 2, binding = 0, row_major) uniform StaticViewData
23+
{
24+
StaticViewData_t staticViewData;
25+
};
26+
// rng
27+
layout(set = 2, binding = 1, rg32ui) uniform uimage2DArray scramblebuf;
28+
layout(set = 2, binding = 2) uniform usamplerBuffer sampleSequence;
29+
// accumulation
30+
layout(set = 2, binding = 3, rg32ui) restrict uniform uimage2DArray accumulation;
31+
// ray data
32+
#include <nbl/builtin/glsl/ext/RadeonRays/ray.glsl>
33+
layout(set = 2, binding = 4, std430) restrict writeonly buffer SinkRays
34+
{
35+
nbl_glsl_ext_RadeonRays_ray sinkRays[];
36+
};
37+
#include <nbl/builtin/glsl/utils/indirect_commands.glsl>
38+
layout(set = 2, binding = 5) restrict coherent buffer RayCount // maybe remove coherent keyword
39+
{
40+
uint rayCount[RAYCOUNT_N_BUFFERING];
41+
};
42+
43+
void clear_raycount()
44+
{
45+
if (all(equal(uvec3(0u),gl_GlobalInvocationID)))
46+
rayCount[(pc.cummon.rayCountWriteIx+1u)&uint(RAYCOUNT_N_BUFFERING_MASK)] = 0u;
47+
}
48+
49+
//
50+
uvec3 get_triangle_indices(in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData, in uint triangleID)
51+
{
52+
const uint baseTriangleVertex = triangleID*3u+batchInstanceData.padding0;
53+
return uvec3(
54+
nbl_glsl_VG_fetchTriangleVertexIndex(baseTriangleVertex,0u),
55+
nbl_glsl_VG_fetchTriangleVertexIndex(baseTriangleVertex,1u),
56+
nbl_glsl_VG_fetchTriangleVertexIndex(baseTriangleVertex,2u)
57+
);
58+
}
59+
60+
// for per pixel inputs
61+
#include <nbl/builtin/glsl/random/xoroshiro.glsl>
62+
#include <nbl/builtin/glsl/utils/transform.glsl>
63+
64+
#include <nbl/builtin/glsl/format/decode.glsl>
65+
#include <nbl/builtin/glsl/format/encode.glsl>
66+
vec3 fetchAccumulation(in uvec3 coord)
67+
{
68+
const uvec2 data = imageLoad(accumulation,ivec3(coord)).rg;
69+
return nbl_glsl_decodeRGB19E7(data);
70+
}
71+
void storeAccumulation(in vec3 color, in uvec3 coord)
72+
{
73+
const uvec2 data = nbl_glsl_encodeRGB19E7(color);
74+
imageStore(accumulation,ivec3(coord),uvec4(data,0u,0u));
75+
}
76+
77+
bool record_emission_common(out vec3 acc, in uvec3 accumulationLocation, vec3 emissive, in bool first_accumulating_path_vertex)
78+
{
79+
acc = vec3(0.0);
80+
const bool notFirstFrame = pc.cummon.rcpFramesDispatched!=1.f;
81+
if (!first_accumulating_path_vertex || notFirstFrame)
82+
acc = fetchAccumulation(accumulationLocation);
83+
if (first_accumulating_path_vertex) // a bit useless to add && notFirstFrame) its a tautology with acc=vec3(0.0)
84+
emissive -= acc;
85+
emissive *= pc.cummon.rcpFramesDispatched;
86+
87+
const bool anyChange = any(greaterThan(abs(emissive),vec3(nbl_glsl_FLT_MIN)));
88+
acc += emissive;
89+
return anyChange;
90+
}
91+
92+
93+
94+
float packOutPixelLocation(in uvec2 outPixelLocation)
95+
{
96+
return uintBitsToFloat(bitfieldInsert(outPixelLocation.x,outPixelLocation.y,16,16));
97+
}
98+
uvec2 unpackOutPixelLocation(in float packed)
99+
{
100+
const uint asUint = floatBitsToUint(packed);
101+
return uvec2(asUint&0xffffu,asUint>>16u);
102+
}
103+
104+
#include "bin/runtime_defines.glsl"
105+
#include <nbl/builtin/glsl/ext/MitsubaLoader/material_compiler_compatibility_impl.glsl>
106+
vec3 normalizedV;
107+
vec3 nbl_glsl_MC_getNormalizedWorldSpaceV()
108+
{
109+
return normalizedV;
110+
}
111+
vec3 normalizedN;
112+
vec3 nbl_glsl_MC_getNormalizedWorldSpaceN()
113+
{
114+
return normalizedN;
115+
}
116+
117+
#include <nbl/builtin/glsl/barycentric/utils.glsl>
118+
mat2x3 dPdBary;
119+
vec3 load_positions(in uvec3 indices, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData)
120+
{
121+
mat3 positions = mat3(
122+
nbl_glsl_fetchVtxPos(indices[0],batchInstanceData),
123+
nbl_glsl_fetchVtxPos(indices[1],batchInstanceData),
124+
nbl_glsl_fetchVtxPos(indices[2],batchInstanceData)
125+
);
126+
const mat4x3 tform = batchInstanceData.tform;
127+
positions = mat3(tform)*positions;
128+
//
129+
for (int i=0; i<2; i++)
130+
dPdBary[i] = positions[i]-positions[2];
131+
return positions[2]+tform[3];
132+
}
133+
134+
#ifdef TEX_PREFETCH_STREAM
135+
mat2x3 nbl_glsl_perturbNormal_dPdSomething()
136+
{
137+
return dPdBary;
138+
}
139+
mat2 dUVdBary;
140+
mat2 nbl_glsl_perturbNormal_dUVdSomething()
141+
{
142+
return dUVdBary;
143+
}
144+
#endif
145+
#define _NBL_USER_PROVIDED_MATERIAL_COMPILER_GLSL_BACKEND_FUNCTIONS_
146+
#include <nbl/builtin/glsl/material_compiler/common.glsl>
147+
148+
nbl_glsl_xoroshiro64star_state_t load_aux_vertex_attrs(
149+
in vec2 compactBary, in uvec3 indices, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData,
150+
in nbl_glsl_MC_oriented_material_t material,
151+
in uvec2 outPixelLocation, in uint vertex_depth_mod_2
152+
#ifdef TEX_PREFETCH_STREAM
153+
,in mat2 dBarydScreen
154+
#endif
155+
)
156+
{
157+
// if we ever support spatially varying emissive, we'll need to hoist barycentric computation and UV fetching to the position fetching
158+
#ifdef TEX_PREFETCH_STREAM
159+
const mat3x2 uvs = mat3x2(
160+
nbl_glsl_fetchVtxUV(indices[0],batchInstanceData),
161+
nbl_glsl_fetchVtxUV(indices[1],batchInstanceData),
162+
nbl_glsl_fetchVtxUV(indices[2],batchInstanceData)
163+
);
164+
const nbl_glsl_MC_instr_stream_t tps = nbl_glsl_MC_oriented_material_t_getTexPrefetchStream(material);
165+
#endif
166+
// only needed for continuing
167+
const mat3 normals = mat3(
168+
nbl_glsl_fetchVtxNormal(indices[0],batchInstanceData),
169+
nbl_glsl_fetchVtxNormal(indices[1],batchInstanceData),
170+
nbl_glsl_fetchVtxNormal(indices[2],batchInstanceData)
171+
);
172+
173+
#ifdef TEX_PREFETCH_STREAM
174+
dUVdBary = mat2(uvs[0]-uvs[2],uvs[1]-uvs[2]);
175+
const vec2 UV = dUVdBary*compactBary+uvs[2];
176+
const mat2 dUVdScreen = nbl_glsl_applyChainRule2D(dUVdBary,dBarydScreen);
177+
nbl_glsl_MC_runTexPrefetchStream(tps,UV,dUVdScreen);
178+
#endif
179+
// not needed for NEE unless doing Area or Projected Solid Angle Sampling
180+
const vec3 normal = normals*nbl_glsl_barycentric_expand(compactBary);
181+
182+
// init scramble while waiting for getting the instance's normal matrix
183+
const nbl_glsl_xoroshiro64star_state_t scramble_start_state = imageLoad(scramblebuf,ivec3(outPixelLocation,1u/*vertex_depth_mod_2*/)).rg;
184+
185+
// while waiting for the scramble state
186+
normalizedN.x = dot(batchInstanceData.normalMatrixRow0,normal);
187+
normalizedN.y = dot(batchInstanceData.normalMatrixRow1,normal);
188+
normalizedN.z = dot(batchInstanceData.normalMatrixRow2,normal);
189+
normalizedN = normalize(normalizedN);
190+
191+
return scramble_start_state;
192+
}
193+
194+
vec3 rand3d(inout nbl_glsl_xoroshiro64star_state_t scramble_state, in int _sample, in int depth)
195+
{
196+
uvec3 seqVal = texelFetch(sampleSequence,int(_sample)+(depth-1)*MAX_ACCUMULATED_SAMPLES).xyz;
197+
seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state));
198+
return vec3(seqVal)*uintBitsToFloat(0x2f800004u);
199+
}
200+
201+
void gen_sample_ray(
202+
out float maxT, out vec3 direction, out vec3 throughput,
203+
inout nbl_glsl_xoroshiro64star_state_t scramble_state, in uint sampleID, in uint depth,
204+
in nbl_glsl_MC_precomputed_t precomp, in nbl_glsl_MC_instr_stream_t gcs, in nbl_glsl_MC_instr_stream_t rnps
205+
)
206+
{
207+
maxT = nbl_glsl_FLT_MAX;
208+
209+
vec3 rand = rand3d(scramble_state,int(sampleID),int(depth));
210+
211+
float pdf;
212+
nbl_glsl_LightSample s;
213+
throughput = nbl_glsl_MC_runGenerateAndRemainderStream(precomp,gcs,rnps,rand,pdf,s);
214+
215+
direction = s.L;
216+
}
217+
218+
219+
void generate_next_rays(
220+
in uint maxRaysToGen, in nbl_glsl_MC_oriented_material_t material, in bool frontfacing, in uint vertex_depth,
221+
in nbl_glsl_xoroshiro64star_state_t scramble_start_state, in uint sampleID, in uvec2 outPixelLocation,
222+
in vec3 origin, in vec3 prevThroughput)
223+
{
224+
// get material streams as well
225+
const nbl_glsl_MC_instr_stream_t gcs = nbl_glsl_MC_oriented_material_t_getGenChoiceStream(material);
226+
const nbl_glsl_MC_instr_stream_t rnps = nbl_glsl_MC_oriented_material_t_getRemAndPdfStream(material);
227+
228+
229+
// need to do this after we have worldspace V and N ready
230+
const nbl_glsl_MC_precomputed_t precomputed = nbl_glsl_MC_precomputeData(frontfacing);
231+
#ifdef NORM_PRECOMP_STREAM
232+
const nbl_glsl_MC_instr_stream_t nps = nbl_glsl_MC_oriented_material_t_getNormalPrecompStream(material);
233+
nbl_glsl_MC_runNormalPrecompStream(nps,precomputed);
234+
#endif
235+
236+
const uint vertex_depth_mod_2 = vertex_depth&0x1u;
237+
const uint vertex_depth_mod_2_inv = vertex_depth_mod_2^0x1u;
238+
// prepare rays
239+
uint raysToAllocate = 0u;
240+
float maxT[MAX_RAYS_GENERATED]; vec3 direction[MAX_RAYS_GENERATED]; vec3 nextThroughput[MAX_RAYS_GENERATED];
241+
for (uint i=1u; i!=vertex_depth; i++)
242+
{
243+
nbl_glsl_xoroshiro64star(scramble_start_state);
244+
nbl_glsl_xoroshiro64star(scramble_start_state);
245+
nbl_glsl_xoroshiro64star(scramble_start_state);
246+
}
247+
for (uint i=0u; i<maxRaysToGen; i++)
248+
{
249+
nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
250+
// TODO: When generating NEE rays, advance the dimension, NOT the sampleID
251+
gen_sample_ray(maxT[i],direction[i],nextThroughput[i],scramble_state,sampleID+i,vertex_depth,precomputed,gcs,rnps);
252+
// TODO: bad idea, invent something else
253+
// if (i==0u)
254+
// imageStore(scramblebuf,ivec3(outPixelLocation,vertex_depth_mod_2_inv),uvec4(scramble_state,0u,0u));
255+
nextThroughput[i] *= prevThroughput;
256+
if (max(max(nextThroughput[i].x,nextThroughput[i].y),nextThroughput[i].z)>exp2(-19.f)) // TODO: reverse tonemap to adjust the threshold
257+
raysToAllocate++;
258+
else
259+
maxT[i] = 0.f;
260+
}
261+
// TODO: investigate workgroup reductions here
262+
const uint baseOutputID = atomicAdd(rayCount[pc.cummon.rayCountWriteIx],raysToAllocate);
263+
264+
// the 1.03125f adjusts for the fact that the normal might be too short (inversesqrt precision)
265+
const float inversesqrt_precision = 1.03125f;
266+
// TODO: investigate why we can't use `normalizedN` here
267+
const vec3 ray_offset_vector = normalize(cross(dPdBary[0],dPdBary[1]))*inversesqrt_precision;
268+
float origin_offset = nbl_glsl_numeric_limits_float_epsilon(44u); // I pulled the constants out of my @$$
269+
origin_offset += dot(abs(ray_offset_vector),abs(origin))*nbl_glsl_numeric_limits_float_epsilon(32u);
270+
// TODO: in the future run backward error analysis of
271+
// dot(mat3(WorldToObj)*(origin+offset*geomNormal/length(geomNormal))+(WorldToObj-vx_pos[1]),geomNormal)
272+
// where
273+
// origin = mat3x2(vx_pos[2]-vx_pos[1],vx_pos[0]-vx_pos[1])*barys+vx_pos[1]
274+
// geonNormal = cross(vx_pos[2]-vx_pos[1],vx_pos[0]-vx_pos[1])
275+
// and we assume only `WorldToObj`, `vx_pos[i]` and `barys` are accurate values. So far:
276+
// offset > (1+gamma(2))/(1-gamma(2))*(dot(abs(geomNormal),omega_error)+dot(abs(omega),geomNormal_error)+dot(omega_error,geomNormal_error))
277+
//const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]);
278+
//float ray_offset = ?;
279+
//ray_offset = nbl_glsl_ieee754_next_ulp_away_from_zero(ray_offset);
280+
const vec3 ray_offset = ray_offset_vector*origin_offset;
281+
const vec3 ray_origin[2] = {origin+ray_offset,origin-ray_offset};
282+
uint offset = 0u;
283+
for (uint i=0u; i<maxRaysToGen; i++)
284+
if (maxT[i]!=0.f)
285+
{
286+
nbl_glsl_ext_RadeonRays_ray newRay;
287+
if (dot(ray_offset_vector,direction[i])<0.f)
288+
newRay.origin = ray_origin[1];
289+
else
290+
newRay.origin = ray_origin[0];
291+
newRay.maxT = maxT[i];
292+
newRay.direction = direction[i];
293+
newRay.time = packOutPixelLocation(outPixelLocation);
294+
newRay.mask = -1;
295+
newRay._active = 1;
296+
newRay.useless_padding[0] = packHalf2x16(nextThroughput[i].rg);
297+
newRay.useless_padding[1] = bitfieldInsert(packHalf2x16(nextThroughput[i].bb),sampleID+i,16,16);
298+
const uint outputID = baseOutputID+(offset++);
299+
sinkRays[outputID] = newRay;
300+
}
301+
}
302+
303+
/* TODO: optimize and reorganize
304+
void main()
305+
{
306+
clear_raycount();
307+
const bool alive = useful_invocation();
308+
uint raysToAllocate = 0u;
309+
vec3 emissive;
310+
if (alive)
311+
{
312+
emissive = staticViewData.envmapBaseColor;
313+
314+
raysToAllocate = main_prolog(emissive,...);
315+
}
316+
317+
const uint raysLocalEnd = nbl_glsl_workgroupInclusiveAdd(raysToAllocate);
318+
uint baseOutputID;
319+
if (gl_LocalInvocationIndex==WORKGROUP_SIZE-1)
320+
baseOutputID = atomicAdd(rayCount[pc.cummon.rayCountWriteIx],raysLocalEnd);
321+
baseOutputID = nbl_glsl_workgroupBroadcast(baseOutputID,WORKGROUP_SIZE-1);
322+
323+
// coalesce rays
324+
for ()
325+
{
326+
}
327+
// write them out to global mem
328+
for ()
329+
{
330+
}
331+
332+
if (alive)
333+
{
334+
// store accumulation
335+
main_epilog();
336+
}
337+
}
338+
*/
339+
#endif

0 commit comments

Comments
 (0)