Skip to content

Commit f1c9d93

Browse files
cleanup shared memory
1 parent 1a8202d commit f1c9d93

File tree

2 files changed

+15
-21
lines changed

2 files changed

+15
-21
lines changed

examples_tests/39.DenoiserTonemapper/ShaderCommon.glsl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ layout(push_constant, row_major) uniform PushConstants{
1919

2020

2121
#define SHARED_CHANNELS 3
22+
struct f16vec3_packed
23+
{
24+
float16_t x;
25+
float16_t y;
26+
float16_t z;
27+
};
28+
2229
// the amount of memory needed for luma metering is bigger than interleaving
2330
#define _NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_ ((COMPUTE_WG_SIZE+1)*8)
2431
shared uint repackBuffer[_NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_];

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
194194
} inBuffers[EII_COUNT];
195195
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
196196
{
197-
float16_t data[];
197+
f16vec3_packed data[];
198198
} outBuffers[EII_COUNT];
199199
vec3 fetchData(in uvec3 texCoord)
200200
{
@@ -217,21 +217,10 @@ void main()
217217
nbl_glsl_ext_LumaMeter(colorLayer && gl_GlobalInvocationID.x<pc.data.imageWidth);
218218
barrier();
219219
}
220-
repackBuffer[gl_LocalInvocationIndex*SHARED_CHANNELS+0u] = floatBitsToUint(globalPixelData[0u]);
221-
repackBuffer[gl_LocalInvocationIndex*SHARED_CHANNELS+1u] = floatBitsToUint(globalPixelData[1u]);
222-
repackBuffer[gl_LocalInvocationIndex*SHARED_CHANNELS+2u] = floatBitsToUint(globalPixelData[2u]);
223-
barrier();
224-
const uint outImagePitch = pc.data.imageWidth*SHARED_CHANNELS;
225-
uint rowOffset = gl_GlobalInvocationID.y*outImagePitch;
226-
uint lineOffset = gl_WorkGroupID.x*COMPUTE_WG_SIZE*SHARED_CHANNELS+gl_LocalInvocationIndex;
227-
if (lineOffset<outImagePitch)
228-
outBuffers[gl_GlobalInvocationID.z].data[rowOffset+lineOffset] = float16_t(uintBitsToFloat(repackBuffer[gl_LocalInvocationIndex+COMPUTE_WG_SIZE*0u]));
229-
lineOffset += COMPUTE_WG_SIZE;
230-
if (lineOffset<outImagePitch)
231-
outBuffers[gl_GlobalInvocationID.z].data[rowOffset+lineOffset] = float16_t(uintBitsToFloat(repackBuffer[gl_LocalInvocationIndex+COMPUTE_WG_SIZE*1u]));
232-
lineOffset += COMPUTE_WG_SIZE;
233-
if (lineOffset<outImagePitch)
234-
outBuffers[gl_GlobalInvocationID.z].data[rowOffset+lineOffset] = float16_t(uintBitsToFloat(repackBuffer[gl_LocalInvocationIndex+COMPUTE_WG_SIZE*2u]));
220+
const uint addr = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
221+
outBuffers[gl_GlobalInvocationID.z].data[addr].x = float16_t(globalPixelData.x);
222+
outBuffers[gl_GlobalInvocationID.z].data[addr].y = float16_t(globalPixelData.y);
223+
outBuffers[gl_GlobalInvocationID.z].data[addr].z = float16_t(globalPixelData.z);
235224
}
236225
)==="));
237226
auto intensityShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -290,18 +279,16 @@ void main()
290279
#include "../ShaderCommon.glsl"
291280
layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
292281
{
293-
float16_t inBuffer[];
282+
f16vec3_packed inBuffer[];
294283
};
295284
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
296285
{
297286
float16_t data[];
298287
} outBuffers[EII_COUNT]; // TODO: do FFT
299288
void main()
300289
{
301-
const uint dataOffset = (gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x)*SHARED_CHANNELS;
302-
303-
// TODO: Optimize this fetch
304-
globalPixelData = vec3(inBuffer[dataOffset+0u],inBuffer[dataOffset+1u],inBuffer[dataOffset+2u]);
290+
const uint dataOffset = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
291+
globalPixelData = vec3(inBuffer[dataOffset].x,inBuffer[dataOffset].y,inBuffer[dataOffset].z);
305292
306293
nbl_glsl_ext_LumaMeter(gl_GlobalInvocationID.x<pc.data.imageWidth);
307294
barrier();

0 commit comments

Comments
 (0)