@@ -39,11 +39,13 @@ struct irr_glsl_ext_LumaMeter_Uniforms_t
39
39
#else
40
40
#define _IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_IMPL_ (_IRR_GLSL_EXT_LUMA_METER_BIN_COUNT* 2 )
41
41
#endif
42
+
43
+ #include < irr/ builtin/ glsl/ workgroup/ shared_arithmetic.glsl>
42
44
// correct for subgroup emulation stuff
43
- #if IRR_GLSL_GREATER(_IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_IMPL_,(_IRR_GLSL_WORKGROUP_SIZE_ << 1 ) )
45
+ #if IRR_GLSL_GREATER(_IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_IMPL_,_IRR_GLSL_WORKGROUP_ARITHMETIC_SHARED_SIZE_NEEDED_ )
44
46
#define _IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ _IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_IMPL_
45
47
#else
46
- #define _IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ (_IRR_GLSL_WORKGROUP_SIZE_ << 1 )
48
+ #define _IRR_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ _IRR_GLSL_WORKGROUP_ARITHMETIC_SHARED_SIZE_NEEDED_
47
49
#endif
48
50
49
51
#if IRR_GLSL_NOT_EQUAL(IRR_GLSL_AND(IRR_GLSL_SUB(_IRR_GLSL_EXT_LUMA_METER_MAX_LUMA_DEFINED_,_IRR_GLSL_EXT_LUMA_METER_MIN_LUMA_DEFINED_),_IRR_GLSL_EXT_LUMA_METER_BIN_COUNT- 1 ),0 )
@@ -276,42 +278,12 @@ float irr_glsl_ext_LumaMeter_impl_getMeasuredLumaLog2(in irr_glsl_ext_LumaMeter_
276
278
#if IRR_GLSL_EQUAL(_IRR_GLSL_EXT_LUMA_METER_MODE_DEFINED_,_IRR_GLSL_EXT_LUMA_METER_MODE_MEDIAN)
277
279
#include < irr/ builtin/ glsl/ workgroup/ arithmetic.glsl>
278
280
#include < irr/ builtin/ glsl/ workgroup/ shuffle.glsl>
279
- // TODO: figure out why the irr_glsl_workgroupExclusiveAdd function doesn't work
280
- uint irr_glsl_workgroupExclusiveAdd2(uint val)
281
- {
282
- #if 1
283
- barrier();
284
- memoryBarrierShared();
285
- SUBGROUP_SCRATCH_INITIALIZE(val,_IRR_GLSL_WORKGROUP_SIZE_,0u,irr_glsl_identityFunction)
286
- uint firstLevelScan = irr_glsl_subgroupInclusiveAdd_impl(false,val);
287
- barrier();
288
- memoryBarrierShared();
289
- const bool propagateReduction = (gl_LocalInvocationIndex& loMask)== loMask;
290
- // uint firstLevelScan = INVCONV(FIRST_SUBGROUP_OP(false,VALUE));
291
- // uint lastInvocationInLevel = lastInvocation;
292
- const uint lowerIndex = gl_LocalInvocationIndex>> irr_glsl_SubgroupSizeLog2;
293
-
294
- if (propagateReduction)
295
- _IRR_GLSL_SCRATCH_SHARED_DEFINED_[lowerIndex] = firstLevelScan;
296
-
297
- barrier();
298
- memoryBarrierShared();
299
- for (uint i= 0u; i< lowerIndex; i++ )
300
- {
301
- firstLevelScan += _IRR_GLSL_SCRATCH_SHARED_DEFINED_[i];
302
- }
303
- firstLevelScan = irr_glsl_workgroupShuffle(firstLevelScan, gl_LocalInvocationIndex != 0u ? (gl_LocalInvocationIndex - 1u) : 0u);
304
- return gl_LocalInvocationIndex != 0u ? firstLevelScan : 0u;
305
- #else
306
- return irr_glsl_workgroupExclusiveAdd(val);
307
- #endif
308
- }
309
281
#endif
310
282
311
283
float irr_glsl_ext_LumaMeter_impl_getMeasuredLumaLog2(in irr_glsl_ext_LumaMeter_output_SPIRV_CROSS_is_dumb_t firstPassOutput, in irr_glsl_ext_LumaMeter_PassInfo_t info)
312
284
{
313
285
#if IRR_GLSL_EQUAL(_IRR_GLSL_EXT_LUMA_METER_MODE_DEFINED_,_IRR_GLSL_EXT_LUMA_METER_MODE_MEDIAN)
314
- uint histogramPrefix = irr_glsl_workgroupExclusiveAdd2 (firstPassOutput);
286
+ uint histogramPrefix = irr_glsl_workgroupExclusiveAdd (firstPassOutput);
315
287
316
288
// TODO: We can do it better, and without how right now workgroup size must equal _IRR_GLSL_EXT_LUMA_METER_BIN_COUNT, but it would be good if it didn't (we could carry out many prefix sums in serial).
317
289
// Assign whole subgroup to do a subgroup_uniform_upper_bound on lower percentile, then do the subgroup_uniform_upper_bound again but in the [previousFound,end) range.
0 commit comments