@@ -194,7 +194,7 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
194
194
} inBuffers[EII_COUNT];
195
195
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
196
196
{
197
- float16_t data[];
197
+ f16vec3_packed data[];
198
198
} outBuffers[EII_COUNT];
199
199
vec3 fetchData(in uvec3 texCoord)
200
200
{
@@ -217,21 +217,10 @@ void main()
217
217
nbl_glsl_ext_LumaMeter(colorLayer && gl_GlobalInvocationID.x<pc.data.imageWidth);
218
218
barrier();
219
219
}
220
- repackBuffer[gl_LocalInvocationIndex*SHARED_CHANNELS+0u] = floatBitsToUint(globalPixelData[0u]);
221
- repackBuffer[gl_LocalInvocationIndex*SHARED_CHANNELS+1u] = floatBitsToUint(globalPixelData[1u]);
222
- repackBuffer[gl_LocalInvocationIndex*SHARED_CHANNELS+2u] = floatBitsToUint(globalPixelData[2u]);
223
- barrier();
224
- const uint outImagePitch = pc.data.imageWidth*SHARED_CHANNELS;
225
- uint rowOffset = gl_GlobalInvocationID.y*outImagePitch;
226
- uint lineOffset = gl_WorkGroupID.x*COMPUTE_WG_SIZE*SHARED_CHANNELS+gl_LocalInvocationIndex;
227
- if (lineOffset<outImagePitch)
228
- outBuffers[gl_GlobalInvocationID.z].data[rowOffset+lineOffset] = float16_t(uintBitsToFloat(repackBuffer[gl_LocalInvocationIndex+COMPUTE_WG_SIZE*0u]));
229
- lineOffset += COMPUTE_WG_SIZE;
230
- if (lineOffset<outImagePitch)
231
- outBuffers[gl_GlobalInvocationID.z].data[rowOffset+lineOffset] = float16_t(uintBitsToFloat(repackBuffer[gl_LocalInvocationIndex+COMPUTE_WG_SIZE*1u]));
232
- lineOffset += COMPUTE_WG_SIZE;
233
- if (lineOffset<outImagePitch)
234
- outBuffers[gl_GlobalInvocationID.z].data[rowOffset+lineOffset] = float16_t(uintBitsToFloat(repackBuffer[gl_LocalInvocationIndex+COMPUTE_WG_SIZE*2u]));
220
+ const uint addr = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
221
+ outBuffers[gl_GlobalInvocationID.z].data[addr].x = float16_t(globalPixelData.x);
222
+ outBuffers[gl_GlobalInvocationID.z].data[addr].y = float16_t(globalPixelData.y);
223
+ outBuffers[gl_GlobalInvocationID.z].data[addr].z = float16_t(globalPixelData.z);
235
224
}
236
225
)===" ));
237
226
auto intensityShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -290,18 +279,16 @@ void main()
290
279
#include "../ShaderCommon.glsl"
291
280
layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
292
281
{
293
- float16_t inBuffer[];
282
+ f16vec3_packed inBuffer[];
294
283
};
295
284
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
296
285
{
297
286
float16_t data[];
298
287
} outBuffers[EII_COUNT]; // TODO: do FFT
299
288
void main()
300
289
{
301
- const uint dataOffset = (gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x)*SHARED_CHANNELS;
302
-
303
- // TODO: Optimize this fetch
304
- globalPixelData = vec3(inBuffer[dataOffset+0u],inBuffer[dataOffset+1u],inBuffer[dataOffset+2u]);
290
+ const uint dataOffset = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
291
+ globalPixelData = vec3(inBuffer[dataOffset].x,inBuffer[dataOffset].y,inBuffer[dataOffset].z);
305
292
306
293
nbl_glsl_ext_LumaMeter(gl_GlobalInvocationID.x<pc.data.imageWidth);
307
294
barrier();
0 commit comments