@@ -39,14 +39,6 @@ mat2x3 nbl_glsl_MC_getdPos()
39
39
#include <nbl/builtin/glsl/material_compiler/common.glsl>
40
40
41
41
42
- // functions
43
- vec3 rand3d(in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state)
44
- {
45
- uvec3 seqVal = texelFetch(sampleSequence,int(_sample)).xyz;
46
- seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state));
47
- return vec3(seqVal)*uintBitsToFloat(0x2f800004u);
48
- }
49
-
50
42
/*
51
43
float maxAbs1(in float val)
52
44
{
@@ -86,16 +78,27 @@ float ULP3(in vec3 val, in uint accuracy)
86
78
}
87
79
*/
88
80
89
-
90
- struct SamplingData_t
81
+ vec3 rand3d(inout nbl_glsl_xoroshiro64star_state_t scramble_state, in uint _sample)
91
82
{
92
- uint sampleID;
93
- };
94
- bool gen_sample_ray(out float maxT, out vec3 direction, out vec3 throughput, in SamplingData_t samplingData)
83
+ uvec3 seqVal = texelFetch(sampleSequence,int(_sample)).xyz;
84
+ seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state));
85
+ return vec3(seqVal)*uintBitsToFloat(0x2f800004u);
86
+ }
87
+
88
+ bool gen_sample_ray(
89
+ out float maxT, out vec3 direction, out vec3 throughput,
90
+ inout nbl_glsl_xoroshiro64star_state_t scramble_state, in uint sampleID,
91
+ in nbl_glsl_MC_precomputed_t precomp, in nbl_glsl_MC_instr_stream_t gcs, in nbl_glsl_MC_instr_stream_t rnps
92
+ )
95
93
{
96
94
maxT = FLT_MAX;
97
- direction = nbl_glsl_MC_getNormalizedWorldSpaceN();
98
- throughput = vec3(1.0)/float(staticViewData.samplesPerPixelPerDispatch);
95
+
96
+ vec3 rand = rand3d(scramble_state,sampleID);
97
+
98
+ float pdf;
99
+ nbl_glsl_LightSample s;
100
+ throughput = nbl_glsl_MC_runGenerateAndRemainderStream(precomp, gcs, rnps, rand, pdf, s);
101
+ throughput /= float(staticViewData.samplesPerPixelPerDispatch);
99
102
return true;
100
103
}
101
104
@@ -108,18 +111,24 @@ void main()
108
111
float revdepth = texelFetch(depthbuf,pixelCoord,0).r;
109
112
110
113
const uint outputID = outputLocation.y*staticViewData.samplesPerRowPerDispatch+outputLocation.x*staticViewData.samplesPerPixelPerDispatch;
111
-
112
- SamplingData_t samplingData;
113
114
114
- bool alive = false;
115
+ nbl_glsl_xoroshiro64star_state_t scramble_start_state; // this should get advanced for secondary rays by 3 or 4 iterations
116
+
115
117
nbl_glsl_MC_precomputed_t precomputed;
116
- nbl_glsl_MC_oriented_material_t material;
117
- nbl_glsl_xoroshiro64star_state_t start_scramble;
118
- vec2 uv;
119
- if (revdepth>0.0)
118
+ nbl_glsl_MC_instr_stream_t gcs;
119
+ nbl_glsl_MC_instr_stream_t rnps;
120
+
121
+ const bool nonBackgroudPixel = revdepth>0.0;
122
+ if (nonBackgroudPixel)
120
123
{
121
124
// vis buffer read
122
125
const uvec2 visBuffer = texelFetch(frontFacing_Object_Triangle,pixelCoord,0).rg;
126
+ // init scramble
127
+ scramble_start_state = texelFetch(scramblebuf,pixelCoord,0).rg;
128
+ // tmp gbuffer reads
129
+ const vec2 normalBuffer = texelFetch(encodedNormal,pixelCoord,0).rg;
130
+ const vec2 UV = texelFetch(uvCoords,pixelCoord,0).xy;
131
+ mat2 dUVdScreen;/*
123
132
mat2x3 dBarydScreen;/*
124
133
{
125
134
// TODO: future https://diaryofagraphicsprogrammer.blogspot.com/2018/03/triangle-visibility-buffer.html
@@ -129,11 +138,6 @@ void main()
129
138
}
130
139
dPosdScreen = mat3(vPos[0],vPos[1],vPos[2])*dBarydScreen;
131
140
*/
132
- // init scramble
133
- start_scramble = texelFetch(scramblebuf,pixelCoord,0).rg;
134
- // tmp gbuffer reads
135
- const vec2 normalBuffer = texelFetch(encodedNormal,pixelCoord,0).rg;
136
- uv = texelFetch(uvCoords,pixelCoord,0).xy;
137
141
138
142
// unproject
139
143
{
@@ -147,57 +151,67 @@ void main()
147
151
}
148
152
149
153
// decode vis buffer
150
- bool frontfacing;
151
- vec3 emissive;
154
+ vec3 emissive = vec3(0.0,0.0,0.0);
152
155
{
153
156
const uint objectID = visBuffer[0]&0x7fffffffu;
154
- frontfacing = objectID==visBuffer[0];
155
157
const uint triangleID = visBuffer[1];
158
+ const bool frontfacing = objectID==visBuffer[0];
159
+
160
+ nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(InstData.data[objectID].material,frontfacing);
156
161
157
- //
162
+ // use loaded data
158
163
precomputed = nbl_glsl_MC_precomputeData(frontfacing);
159
- material = nbl_glsl_MC_material_data_t_getOriented(InstData.data[objectID].material,precomputed.frontface);
160
-
161
- //
162
164
emissive = nbl_glsl_MC_oriented_material_t_getEmissive(material);
163
-
165
+ gcs = nbl_glsl_MC_oriented_material_t_getGenChoiceStream(material);
166
+ rnps = nbl_glsl_MC_oriented_material_t_getRemAndPdfStream(material);
167
+
164
168
// normally we'd use MeshPackerV2's vertex attribute data for this, but now we read from temporary GBuffer
165
169
const vec3 normal = nbl_glsl_NormalDecode_signedSpherical(normalBuffer);
166
170
normalizedN.x = dot(InstData.data[objectID].normalMatrixRow0,normal);
167
171
normalizedN.y = dot(InstData.data[objectID].normalMatrixRow1,normal);
168
172
normalizedN.z = dot(InstData.data[objectID].normalMatrixRow2,normal);
173
+
174
+ // prefetch textures and normals
175
+ #ifdef TEX_PREFETCH_STREAM
176
+ nbl_glsl_MC_runTexPrefetchStream(nbl_glsl_MC_oriented_material_t_getTexPrefetchStream(material), UV, dUVdScreen);
177
+ #endif
178
+ #ifdef NORM_PRECOMP_STREAM
179
+ nbl_glsl_MC_runNormalPrecompStream(nbl_glsl_MC_oriented_material_t_getNormalPrecompStream(material), dUVdScreen, precomputed);
180
+ #endif
169
181
}
170
182
171
183
//
172
- vec3 acc;
173
- if (pc.cummon.rcpFramesDispatched<1.0)
174
- acc = fetchAccumulation(pixelCoord)+ emissive/float(pc.cummon.framesDispatched-1u) ;
175
- else
176
- acc = emissive;
177
-
178
- //
179
-
180
- storeAccumulation(acc,pixelCoord);
181
- alive = true;
184
+ if (any(greaterThan(emissive,vec3(FLT_MIN))))
185
+ {
186
+ vec3 acc = emissive;
187
+ if (pc.cummon.rcpFramesDispatched<1.0)
188
+ {
189
+ acc /= float(pc.cummon.framesDispatched-1u);
190
+ acc += fetchAccumulation(pixelCoord);
191
+ }
192
+ storeAccumulation(acc,pixelCoord);
193
+ }
182
194
}
183
195
#ifdef USE_OPTIX_DENOISER
184
196
// TODO: translate normal into float16_t buff
185
197
#endif
186
198
187
199
for (uint i=0u; i<staticViewData.samplesPerPixelPerDispatch; i++)
188
200
{
189
- vec3 direction; // TODO: just use nbl_glsl_LightSample?
201
+ vec3 direction;
190
202
float maxT;
191
- vec4 throughput = vec4(0.0,0.0,0.0,-1.0); // -1 needs to be there to ensure no backface culling on rays
203
+ vec4 throughput;
204
+ throughput.a = -1.0; // -1 needs to be there to ensure no backface culling on rays
192
205
193
206
bool validRay = false;
194
- if (alive )
207
+ if (nonBackgroudPixel )
195
208
{
196
- samplingData.sampleID = pc.cummon.samplesComputedPerPixel+i;
197
- validRay = gen_sample_ray(maxT,direction,throughput.rgb,samplingData);
209
+ nbl_glsl_xoroshiro64star_state_t scramble_state = scramble_start_state;
210
+ const uint sampleID = pc.cummon.samplesComputedPerPixel+i;
211
+ validRay = gen_sample_ray(maxT,direction,throughput.rgb,scramble_state,sampleID,precomputed,gcs,rnps);
198
212
}
199
213
200
- // TODO: repack rays in smem for coalescing
214
+ // TODO: repack rays in smem for coalescing, or optimize this somehow
201
215
const uint realOutputID = outputID+i;
202
216
if (validRay)
203
217
{
0 commit comments