1
1
#version 430 core
2
- #define WORK_GROUP_DIM 16u
3
- layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
4
- #define WORK_GROUP_SIZE (WORK_GROUP_DIM*WORK_GROUP_DIM)
2
+ #include "raygenCommon.glsl"
5
3
6
4
7
- #include "irr/builtin/glsl/utils/NormalDecode.glsl"
8
-
9
-
10
- #include "common.glsl"
11
-
12
-
13
- // TODO transform into push constants
14
- // uniforms
15
- layout(location = 0) uniform vec3 uCameraPos;
16
- layout(location = 1) uniform float uDepthLinearizationConstant;
17
- layout(location = 2) uniform mat4 uFrustumCorners;
18
- layout(location = 3) uniform uvec2 uImageSize;
19
- layout(location = 4) uniform uvec4 uImageWidth_ImageArea_TotalImageSamples_Samples;
20
- layout(location = 5) uniform uint uSamplesComputed;
21
- layout(location = 6) uniform vec4 uImageSize2Rcp;
22
-
23
- // image views
24
- layout(set = 2, binding = 0) uniform usamplerBuffer sampleSequence;
25
- layout(set = 2, binding = 1) uniform usampler2D scramblebuf;
26
- layout(set = 2, binding = 2) uniform sampler2D depthbuf;
27
- layout(set = 2, binding = 3) uniform usampler2D objectTriangleFrontFacing;
28
- layout(set = 2, binding = 4) uniform sampler2D encodedNormal;
29
- layout(set = 2, binding = 5) uniform sampler2D uv;
30
-
31
- // SSBOs
32
- #include "irr/builtin/glsl/ext/RadeonRays/ray.glsl"
33
- layout(set = 2, binding = 6, std430) restrict writeonly buffer Rays
5
+ #include <irr/builtin/glsl/utils/normal_decode.glsl>
6
+ //
7
+ layout(set = 2, binding = 0, row_major) uniform RaygenData
34
8
{
35
- RadeonRays_ray rays[];
36
- };
37
-
38
- layout(set = 1, binding = 0, std430) restrict readonly buffer CumulativeLightPDF
39
- {
40
- uint lightCDF[];
41
- };
42
-
43
- layout(set = 1, binding = 1, std430, row_major) restrict readonly buffer Lights
44
- {
45
- SLight light[];
9
+ RaygenShaderData_t raygenData;
46
10
};
11
+ // rng
12
+ layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence;
13
+ layout(set = 2, binding = 2) uniform usampler2D scramblebuf;
14
+ // vis buffer
15
+ layout(set = 2, binding = 3) uniform sampler2D depthbuf;
16
+ layout(set = 2, binding = 4) uniform usampler2D objectTriangleFrontFacing;
17
+ layout(set = 2, binding = 5) uniform sampler2D encodedNormal;
18
+ layout(set = 2, binding = 6) uniform sampler2D uv;
47
19
48
20
49
21
@@ -58,10 +30,12 @@ float linearizeZBufferVal(in float nonLinearZBufferVal)
58
30
// positive [0,1] Z: `B/(C-A-Cy)/(B/(C-A))`
59
31
// positive [0,1] Z: `(C-A)/(C-A-Cy)`
60
32
// positive [0,1] Z: `D/(D-Cy)`
61
- return 1.0/(uDepthLinearizationConstant *nonLinearZBufferVal+1.0);
33
+ return 1.0/(pc.data.depthLinearizationConstant *nonLinearZBufferVal+1.0);
62
34
}
63
35
64
- float maxAbs1(in float val)
36
+
37
+ /*
38
+ float maxAbs1(in float val)
65
39
{
66
40
return abs(val);
67
41
}
@@ -97,28 +71,19 @@ float ULP3(in vec3 val, in uint accuracy)
97
71
float x = maxAbs3(val);
98
72
return uintBitsToFloat(floatBitsToUint(x) + accuracy)-x;
99
73
}
74
+ */
100
75
101
76
102
-
103
- uint ugen_uniform_sample1(in uint dimension, in uint sampleIx, in uint scramble);
104
- uvec2 ugen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble);
105
-
106
- vec2 gen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble);
107
-
108
-
109
- uint ugen_uniform_sample1(in uint dimension, in uint sampleIx, in uint scramble)
110
- {
111
- return ugen_uniform_sample2(dimension,sampleIx,scramble).x;
112
- }
113
- uvec2 ugen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble)
77
+ struct SamplingData_t
114
78
{
115
- uint address = (dimension>>1u)*MAX_ACCUMULATED_SAMPLES+(sampleIx&(MAX_ACCUMULATED_SAMPLES-1u));
116
- return texelFetch(sampleSequence,int(address)).xy^uvec2(scramble);
117
- }
118
-
119
- vec2 gen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble)
79
+ uint sampleID;
80
+ };
81
+ bool gen_sample_ray(out float maxT, out vec3 direction, out vec3 throughput, in SamplingData_t samplingData)
120
82
{
121
- return vec2(ugen_uniform_sample2(dimension,sampleIx,scramble))/vec2(~0u);
83
+ maxT = FLT_MAX;
84
+ direction = normalize(vec3(0.5,0.5,0.5));
85
+ throughput = vec3(1.0);
86
+ return true;
122
87
}
123
88
124
89
#ifdef TODO
@@ -189,49 +154,71 @@ vec3 light_sample(out vec3 incoming, in uint sampleIx, in uint scramble, inout f
189
154
void main()
190
155
{
191
156
uvec2 outputLocation = gl_GlobalInvocationID.xy;
192
- bool alive = all(lessThan(outputLocation,uImageSize));
193
- if (alive)
157
+ if (all(lessThan(outputLocation,pc.data.imageDimensions)))
194
158
{
195
- // TODO: accelerate texture fetching
196
159
ivec2 uv = ivec2(outputLocation);
197
160
float revdepth = texelFetch(depthbuf,uv,0).r;
198
161
199
- uint outputID = outputLocation.x+uImageWidth_ImageArea_TotalImageSamples_Samples.x* outputLocation.y ;
162
+ uint outputID = outputLocation.y*pc.data.samplesPerRowPerDispatch+ outputLocation.x ;
200
163
164
+ SamplingData_t samplingData;
201
165
// unproject
202
166
vec3 viewDir;
203
167
vec3 position;
204
168
{
205
- vec2 NDC = vec2(outputLocation)*uImageSize2Rcp.xy+uImageSize2Rcp.zw;
206
- viewDir = mix(uFrustumCorners[0]*NDC.x+uFrustumCorners[1],uFrustumCorners[2]*NDC.x+uFrustumCorners[3],NDC.yyyy).xyz;
207
- position = viewDir*linearizeZBufferVal(revdepth)+uCameraPos;
208
- }
169
+ const mat4x3 frustumCorners = pc.data.frustumCorners;
170
+ const vec2 NDC = vec2(outputLocation)*pc.data.rcpPixelSize+pc.data.rcpHalfPixelSize;
209
171
210
- alive = revdepth>0.0;
172
+ viewDir = mix(frustumCorners[0]*NDC.x+frustumCorners[1],frustumCorners[2]*NDC.x+frustumCorners[3],NDC.yyy);
173
+ position = viewDir*linearizeZBufferVal(revdepth)+pc.data.cameraPosition;
174
+ }
175
+
176
+ bool alive = false;
177
+ uint scramble,objectID,triangleID;
178
+ bool backfacing;
179
+ vec3 normal = vec3(0.0);
180
+ vec2 uvCoord;
181
+ if (revdepth>0.0)
182
+ {
183
+ scramble = texelFetch(scramblebuf,uv,0).r;
211
184
212
- uint scramble = texelFetch(scramblebuf,uv,0).r;
185
+ alive = true;
186
+ }
187
+ #ifdef USE_OPTIX_DENOISER
188
+ // TODO: translate normal into float16_t buff
189
+ #endif
213
190
214
- RadeonRays_ray newray;
215
- newray.time = 0.0;
216
- newray.mask = alive ? -1:0;
217
- #ifdef TODO
218
- for (uint i=0u; i<uImageWidth_ImageArea_TotalImageSamples_Samples.w; i++)
191
+ for (uint i=0u; i<pc.data.samplesPerPixelPerDispatch; i++)
219
192
{
220
- vec4 throughput = vec4(0.0,0.0,0.0,-1.0);
221
- float error = GET_MAGNITUDE(1.0-revdepth)*0.1;
193
+ vec3 direction; // TODO: just use irr_glsl_LightSample?
194
+ float maxT;
195
+ vec4 throughput = vec4(0.0,0.0,0.0,-1.0); // -1 needs to be there to ensure no backface culling on rays
222
196
223
- newray.maxT = FLT_MAX ;
197
+ bool validRay = false ;
224
198
if (alive)
225
- throughput.rgb = light_sample(newray.direction,uSamplesComputed+i,scramble,newray.maxT,alive,position);
226
-
227
- newray.origin = position+newray.direction*error/maxAbs3(newray.direction);
228
- newray._active = alive ? 1:0;
229
- newray.backfaceCulling = int(packHalf2x16(throughput.ab));
230
- newray.useless_padding = int(packHalf2x16(throughput.gr));
231
-
232
- // TODO: repack rays for coalescing
233
- rays[outputID+i*uImageWidth_ImageArea_TotalImageSamples_Samples.y] = newray;
199
+ {
200
+ samplingData.sampleID = pc.data.samplesComputedPerPixel+i;
201
+ validRay = gen_sample_ray(maxT,direction,throughput.rgb,samplingData);
202
+ }
203
+
204
+ // TODO: repack rays in smem for coalescing
205
+ const uint realOutputID = outputID+i;
206
+ if (validRay)
207
+ {
208
+ rays[realOutputID].origin = position;/*+newray.direction*err?; TODO */
209
+ rays[realOutputID].maxT = 0.0;
210
+ rays[realOutputID].direction = direction;
211
+ rays[realOutputID].mask = -1;
212
+ rays[realOutputID]._active = 1;
213
+ rays[realOutputID].backfaceCulling = int(packHalf2x16(throughput.ab));
214
+ rays[realOutputID].useless_padding = int(packHalf2x16(throughput.gr));
215
+ }
216
+ else
217
+ {
218
+ rays[realOutputID].maxT = 0.0;
219
+ rays[realOutputID].mask = 0;
220
+ rays[realOutputID]._active = 0;
221
+ }
234
222
}
235
- #endif
236
223
}
237
224
}
0 commit comments