Skip to content

Commit e6a1e82

Browse files
save the work for tommorrow
1 parent 4a8db56 commit e6a1e82

File tree

5 files changed

+105
-157
lines changed

5 files changed

+105
-157
lines changed

examples_tests/22.RaytracedAO/common.glsl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77

88
#ifdef __cplusplus
99
#define uint uint32_t
10+
struct uvec2
11+
{
12+
uint32_t x,y;
13+
};
14+
struct vec2
15+
{
16+
float x,y;
17+
};
1018
struct vec3
1119
{
1220
float x,y,z;
@@ -16,6 +24,14 @@
1624
#endif
1725

1826

27+
struct RaytraceShaderCommonData_t
28+
{
29+
uvec2 imageDimensions;
30+
uint samplesPerPixelPerDispatch;
31+
uint samplesPerRowPerDispatch;
32+
};
33+
34+
1935
struct SLight
2036
{
2137
#ifdef __cplusplus

examples_tests/22.RaytracedAO/dirty_source/ExtraCrap.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,7 @@ void Renderer::render(irr::ITimer* timer)
10661066
camera->render();
10671067

10681068
const auto currentViewProj = camera->getConcatenatedMatrix();
1069-
//if (!core::equals(prevViewProj,currentViewProj,core::ROUNDING_ERROR<core::matrix4SIMD>()*1000.0))
1069+
if (!core::equals(prevViewProj,currentViewProj,core::ROUNDING_ERROR<core::matrix4SIMD>()*1000.0))
10701070
{
10711071
m_framesDone = 0u;
10721072

@@ -1142,12 +1142,12 @@ void Renderer::render(irr::ITimer* timer)
11421142
float uImageSize2Rcp[4] = {1.f/static_cast<float>(m_renderSize[0]),1.f/static_cast<float>(m_renderSize[1]),0.5f/static_cast<float>(m_renderSize[0]),0.5f/static_cast<float>(m_renderSize[1])};
11431143
COpenGLExtensionHandler::pGlProgramUniform4fv(m_raygenProgram, 6, 1, uImageSize2Rcp);
11441144
}*/
1145-
1146-
m_driver->bindDescriptorSets(EPBP_COMPUTE, m_raygenLayout.get(), 0, 1, &m_globalBackendDataDS.get(), nullptr);
1147-
m_driver->bindDescriptorSets(EPBP_COMPUTE, m_raygenLayout.get(), 2, 1, &m_raygenDS2.get(), nullptr);
1145+
#endif
1146+
IGPUDescriptorSet* descriptorSets[] = {m_globalBackendDataDS.get(),m_raygenDS.get()};
1147+
m_driver->bindDescriptorSets(EPBP_COMPUTE, m_raygenPipelineLayout.get(), 0, 2, descriptorSets, nullptr);
11481148
m_driver->bindComputePipeline(m_raygenPipeline.get());
1149+
m_driver->pushConstants(m_raygenPipelineLayout.get(),ISpecializedShader::ESS_COMPUTE,0u,sizeof(RaygenShaderData_t),&m_raygenShaderData);
11491150
m_driver->dispatch(m_raygenWorkGroups[0], m_raygenWorkGroups[1], 1);
1150-
#endif
11511151
// probably wise to flush all caches
11521152
COpenGLExtensionHandler::pGlMemoryBarrier(GL_ALL_BARRIER_BITS);
11531153
}

examples_tests/22.RaytracedAO/raygen.comp

Lines changed: 76 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,21 @@
11
#version 430 core
2-
#define WORK_GROUP_DIM 16u
3-
layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
4-
#define WORK_GROUP_SIZE (WORK_GROUP_DIM*WORK_GROUP_DIM)
2+
#include "raygenCommon.glsl"
53

64

7-
#include "irr/builtin/glsl/utils/NormalDecode.glsl"
8-
9-
10-
#include "common.glsl"
11-
12-
13-
// TODO transform into push constants
14-
// uniforms
15-
layout(location = 0) uniform vec3 uCameraPos;
16-
layout(location = 1) uniform float uDepthLinearizationConstant;
17-
layout(location = 2) uniform mat4 uFrustumCorners;
18-
layout(location = 3) uniform uvec2 uImageSize;
19-
layout(location = 4) uniform uvec4 uImageWidth_ImageArea_TotalImageSamples_Samples;
20-
layout(location = 5) uniform uint uSamplesComputed;
21-
layout(location = 6) uniform vec4 uImageSize2Rcp;
22-
23-
// image views
24-
layout(set = 2, binding = 0) uniform usamplerBuffer sampleSequence;
25-
layout(set = 2, binding = 1) uniform usampler2D scramblebuf;
26-
layout(set = 2, binding = 2) uniform sampler2D depthbuf;
27-
layout(set = 2, binding = 3) uniform usampler2D objectTriangleFrontFacing;
28-
layout(set = 2, binding = 4) uniform sampler2D encodedNormal;
29-
layout(set = 2, binding = 5) uniform sampler2D uv;
30-
31-
// SSBOs
32-
#include "irr/builtin/glsl/ext/RadeonRays/ray.glsl"
33-
layout(set = 2, binding = 6, std430) restrict writeonly buffer Rays
5+
#include <irr/builtin/glsl/utils/normal_decode.glsl>
6+
//
7+
layout(set = 2, binding = 0, row_major) uniform RaygenData
348
{
35-
RadeonRays_ray rays[];
36-
};
37-
38-
layout(set = 1, binding = 0, std430) restrict readonly buffer CumulativeLightPDF
39-
{
40-
uint lightCDF[];
41-
};
42-
43-
layout(set = 1, binding = 1, std430, row_major) restrict readonly buffer Lights
44-
{
45-
SLight light[];
9+
RaygenShaderData_t raygenData;
4610
};
11+
// rng
12+
layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence;
13+
layout(set = 2, binding = 2) uniform usampler2D scramblebuf;
14+
// vis buffer
15+
layout(set = 2, binding = 3) uniform sampler2D depthbuf;
16+
layout(set = 2, binding = 4) uniform usampler2D objectTriangleFrontFacing;
17+
layout(set = 2, binding = 5) uniform sampler2D encodedNormal;
18+
layout(set = 2, binding = 6) uniform sampler2D uv;
4719

4820

4921

@@ -58,10 +30,12 @@ float linearizeZBufferVal(in float nonLinearZBufferVal)
5830
// positive [0,1] Z: `B/(C-A-Cy)/(B/(C-A))`
5931
// positive [0,1] Z: `(C-A)/(C-A-Cy)`
6032
// positive [0,1] Z: `D/(D-Cy)`
61-
return 1.0/(uDepthLinearizationConstant*nonLinearZBufferVal+1.0);
33+
return 1.0/(pc.data.depthLinearizationConstant*nonLinearZBufferVal+1.0);
6234
}
6335

64-
float maxAbs1(in float val)
36+
37+
/*
38+
float maxAbs1(in float val)
6539
{
6640
return abs(val);
6741
}
@@ -97,28 +71,19 @@ float ULP3(in vec3 val, in uint accuracy)
9771
float x = maxAbs3(val);
9872
return uintBitsToFloat(floatBitsToUint(x) + accuracy)-x;
9973
}
74+
*/
10075

10176

102-
103-
uint ugen_uniform_sample1(in uint dimension, in uint sampleIx, in uint scramble);
104-
uvec2 ugen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble);
105-
106-
vec2 gen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble);
107-
108-
109-
uint ugen_uniform_sample1(in uint dimension, in uint sampleIx, in uint scramble)
110-
{
111-
return ugen_uniform_sample2(dimension,sampleIx,scramble).x;
112-
}
113-
uvec2 ugen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble)
77+
struct SamplingData_t
11478
{
115-
uint address = (dimension>>1u)*MAX_ACCUMULATED_SAMPLES+(sampleIx&(MAX_ACCUMULATED_SAMPLES-1u));
116-
return texelFetch(sampleSequence,int(address)).xy^uvec2(scramble);
117-
}
118-
119-
vec2 gen_uniform_sample2(in uint dimension, in uint sampleIx, in uint scramble)
79+
uint sampleID;
80+
};
81+
bool gen_sample_ray(out float maxT, out vec3 direction, out vec3 throughput, in SamplingData_t samplingData)
12082
{
121-
return vec2(ugen_uniform_sample2(dimension,sampleIx,scramble))/vec2(~0u);
83+
maxT = FLT_MAX;
84+
direction = normalize(vec3(0.5,0.5,0.5));
85+
throughput = vec3(1.0);
86+
return true;
12287
}
12388

12489
#ifdef TODO
@@ -189,49 +154,71 @@ vec3 light_sample(out vec3 incoming, in uint sampleIx, in uint scramble, inout f
189154
void main()
190155
{
191156
uvec2 outputLocation = gl_GlobalInvocationID.xy;
192-
bool alive = all(lessThan(outputLocation,uImageSize));
193-
if (alive)
157+
if (all(lessThan(outputLocation,pc.data.imageDimensions)))
194158
{
195-
// TODO: accelerate texture fetching
196159
ivec2 uv = ivec2(outputLocation);
197160
float revdepth = texelFetch(depthbuf,uv,0).r;
198161

199-
uint outputID = outputLocation.x+uImageWidth_ImageArea_TotalImageSamples_Samples.x*outputLocation.y;
162+
uint outputID = outputLocation.y*pc.data.samplesPerRowPerDispatch+outputLocation.x;
200163

164+
SamplingData_t samplingData;
201165
// unproject
202166
vec3 viewDir;
203167
vec3 position;
204168
{
205-
vec2 NDC = vec2(outputLocation)*uImageSize2Rcp.xy+uImageSize2Rcp.zw;
206-
viewDir = mix(uFrustumCorners[0]*NDC.x+uFrustumCorners[1],uFrustumCorners[2]*NDC.x+uFrustumCorners[3],NDC.yyyy).xyz;
207-
position = viewDir*linearizeZBufferVal(revdepth)+uCameraPos;
208-
}
169+
const mat4x3 frustumCorners = pc.data.frustumCorners;
170+
const vec2 NDC = vec2(outputLocation)*pc.data.rcpPixelSize+pc.data.rcpHalfPixelSize;
209171

210-
alive = revdepth>0.0;
172+
viewDir = mix(frustumCorners[0]*NDC.x+frustumCorners[1],frustumCorners[2]*NDC.x+frustumCorners[3],NDC.yyy);
173+
position = viewDir*linearizeZBufferVal(revdepth)+pc.data.cameraPosition;
174+
}
175+
176+
bool alive = false;
177+
uint scramble,objectID,triangleID;
178+
bool backfacing;
179+
vec3 normal = vec3(0.0);
180+
vec2 uvCoord;
181+
if (revdepth>0.0)
182+
{
183+
scramble = texelFetch(scramblebuf,uv,0).r;
211184

212-
uint scramble = texelFetch(scramblebuf,uv,0).r;
185+
alive = true;
186+
}
187+
#ifdef USE_OPTIX_DENOISER
188+
// TODO: translate normal into float16_t buff
189+
#endif
213190

214-
RadeonRays_ray newray;
215-
newray.time = 0.0;
216-
newray.mask = alive ? -1:0;
217-
#ifdef TODO
218-
for (uint i=0u; i<uImageWidth_ImageArea_TotalImageSamples_Samples.w; i++)
191+
for (uint i=0u; i<pc.data.samplesPerPixelPerDispatch; i++)
219192
{
220-
vec4 throughput = vec4(0.0,0.0,0.0,-1.0);
221-
float error = GET_MAGNITUDE(1.0-revdepth)*0.1;
193+
vec3 direction; // TODO: just use irr_glsl_LightSample?
194+
float maxT;
195+
vec4 throughput = vec4(0.0,0.0,0.0,-1.0); // -1 needs to be there to ensure no backface culling on rays
222196

223-
newray.maxT = FLT_MAX;
197+
bool validRay = false;
224198
if (alive)
225-
throughput.rgb = light_sample(newray.direction,uSamplesComputed+i,scramble,newray.maxT,alive,position);
226-
227-
newray.origin = position+newray.direction*error/maxAbs3(newray.direction);
228-
newray._active = alive ? 1:0;
229-
newray.backfaceCulling = int(packHalf2x16(throughput.ab));
230-
newray.useless_padding = int(packHalf2x16(throughput.gr));
231-
232-
// TODO: repack rays for coalescing
233-
rays[outputID+i*uImageWidth_ImageArea_TotalImageSamples_Samples.y] = newray;
199+
{
200+
samplingData.sampleID = pc.data.samplesComputedPerPixel+i;
201+
validRay = gen_sample_ray(maxT,direction,throughput.rgb,samplingData);
202+
}
203+
204+
// TODO: repack rays in smem for coalescing
205+
const uint realOutputID = outputID+i;
206+
if (validRay)
207+
{
208+
rays[realOutputID].origin = position;/*+newray.direction*err?; TODO */
209+
rays[realOutputID].maxT = 0.0;
210+
rays[realOutputID].direction = direction;
211+
rays[realOutputID].mask = -1;
212+
rays[realOutputID]._active = 1;
213+
rays[realOutputID].backfaceCulling = int(packHalf2x16(throughput.ab));
214+
rays[realOutputID].useless_padding = int(packHalf2x16(throughput.gr));
215+
}
216+
else
217+
{
218+
rays[realOutputID].maxT = 0.0;
219+
rays[realOutputID].mask = 0;
220+
rays[realOutputID]._active = 0;
221+
}
234222
}
235-
#endif
236223
}
237224
}
Lines changed: 7 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#version 430 core
2+
23
#define WORK_GROUP_DIM 32u
34
layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
45
#define WORK_GROUP_SIZE (WORK_GROUP_DIM*WORK_GROUP_DIM)
@@ -7,48 +8,16 @@ layout(local_size_x = WORK_GROUP_DIM, local_size_y = WORK_GROUP_DIM) in;
78
#include "common.glsl"
89

910

10-
// TODO translate into push contants
11-
// uniforms
12-
layout(location = 0) uniform uvec2 uImageSize;
13-
layout(location = 1) uniform uvec4 uImageWidth_ImageArea_TotalImageSamples_Samples;
14-
layout(location = 2) uniform float uRcpFramesDone;
15-
layout(location = 3) uniform mat3 uNormalMatrix;
16-
17-
// image views
18-
layout(set = 2, binding = 0) uniform usampler2D lightIndex;
19-
layout(set = 2, binding = 1) uniform sampler2D albedobuf;
20-
layout(set = 2, binding = 2) uniform sampler2D normalbuf;
21-
layout(set = 2, binding = 3, rgba32f) restrict uniform image2D framebuffer;
22-
23-
// SSBOs
24-
layout(set = 2, binding = 4, std430) restrict readonly buffer Rays
25-
{
26-
RadeonRays_ray rays[];
27-
};
28-
layout(set = 2, binding = 5, std430) restrict buffer Queries
11+
layout(push_constant, row_major) uniform PushConstants
2912
{
30-
int hit[];
31-
};
13+
ResolveShaderData_t data;
14+
} pc;
3215

33-
layout(set = 2, binding = 6, std430, row_major) restrict readonly buffer LightRadiances
34-
{
35-
vec3 lightRadiance[]; // Watts / steriadian / steradian
36-
};
3716

38-
#ifdef USE_OPTIX_DENOISER
39-
layout(set = 2, binding = 7, std430) restrict writeonly buffer DenoiserColorInput
17+
layout(set = 2, binding = 0, std430) restrict buffer Queries
4018
{
41-
float16_t colorOutput[];
42-
};
43-
layout(set = 2, binding = 8, std430) restrict writeonly buffer DenoiserAlbedoInput
44-
{
45-
float16_t albedoOutput[];
46-
};
47-
layout(set = 2, binding = 9, std430) restrict writeonly buffer DenoiserNormalInput
48-
{
49-
float16_t normalOutput[];
19+
int hit[];
5020
};
51-
#endif
5221

5322

5423

@@ -70,10 +39,6 @@ void main()
7039
uint baseID = gl_GlobalInvocationID.x+uImageWidth_ImageArea_TotalImageSamples_Samples.x*gl_GlobalInvocationID.y;
7140
bool alive = all(lessThan(gl_GlobalInvocationID.xy,uImageSize));
7241

73-
vec3 normal;
74-
if (alive)
75-
normal = irr_glsl_NormalDecode_signedSpherical(texelFetch(normalbuf,pixelCoord,0).rg);
76-
7742
vec4 acc = vec4(0.0);
7843
if (uRcpFramesDone<1.0 && alive)
7944
acc = imageLoad(framebuffer,pixelCoord);
@@ -107,8 +72,6 @@ void main()
10772
for (uint j=localID; j<CACHE_SIZE; j+=CACHE_DIM*CACHE_DIM)
10873
{
10974
vec3 raydiance = vec4(unpackHalf2x16(rayScratch0[j]),unpackHalf2x16(rayScratch1[j])).gra;
110-
// TODO: sophisticated BSDF eval
111-
raydiance *= max(dot(vec3(rayScratch2[j],rayScratch3[j],rayScratch4[j]),normal),0.0)/kPI;
11275
color += raydiance;
11376
}
11477

@@ -122,29 +85,11 @@ void main()
12285

12386
if (alive)
12487
{
125-
// TODO: sophisticated BSDF eval
126-
vec3 albedo = texelFetch(albedobuf,pixelCoord,0).rgb;
127-
color *= albedo;
128-
129-
// TODO: move ray gen, for fractional sampling
88+
// TODO: move to ray gen, for fractional sampling?
13089
color *= 1.0/float(uImageWidth_ImageArea_TotalImageSamples_Samples.w);
13190

132-
uint lightID = texelFetch(lightIndex,pixelCoord,0)[0];
133-
if (lightID!=0xdeadbeefu)
134-
color += lightRadiance[lightID];
135-
13691
// TODO: optimize the color storage (RGB9E5/RGB19E7 anyone?)
13792
acc.rgb += (color-acc.rgb)*uRcpFramesDone;
13893
imageStore(framebuffer,pixelCoord,acc);
139-
#ifdef USE_OPTIX_DENOISER
140-
for (uint i=0u; i<3u; i++)
141-
colorOutput[baseID*3+i] = float16_t(acc[i]);
142-
//colorOutput[baseID*3+i] = float16_t(clamp(acc[i],0.0001,10000.0));
143-
for (uint i=0u; i<3u; i++)
144-
albedoOutput[baseID*3+i] = float16_t(albedo[i]);
145-
normal = uNormalMatrix*normal;
146-
for (uint i=0u; i<3u; i++)
147-
normalOutput[baseID*3+i] = float16_t(normal[i]);
148-
#endif
14994
}
15095
}

include/irr/builtin/glsl/ext/RadeonRays/ray.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ struct irr_glsl_ext_RadeonRays_ray
1515

1616
irr_glsl_ext_RadeonRays_ray irr_glsl_ext_RadeonRays_constructDefaultRay(in vec3 origin, in vec3 direction, in float maxLen, in int userData)
1717
{
18-
RadeonRays_ray retval;
18+
irr_glsl_ext_RadeonRays_ray retval;
1919
retval.origin = origin;
2020
retval.maxT = maxLen;
2121
retval.direction = direction;

0 commit comments

Comments
 (0)