@@ -33,20 +33,12 @@ vec3 nbl_glsl_ext_LumaMeter_getColor(bool wgExecutionMask);
33
33
34
34
#ifndef _NBL_GLSL_EXT_LUMA_METER_IMPL_DECLARED_
35
35
#define _NBL_GLSL_EXT_LUMA_METER_IMPL_DECLARED_
36
- void nbl_glsl_ext_LumaMeter(bool wgExecutionMask);
36
+ void nbl_glsl_ext_LumaMeter(in bool wgExecutionMask);
37
37
#endif
38
38
39
- #ifndef _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
40
- #define _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
41
- void nbl_glsl_ext_LumaMeter(bool wgExecutionMask)
39
+ float nbl_glsl_ext_LumaMeter_local_process(in bool wgExecutionMask, in vec3 color)
42
40
{
43
- vec3 color = nbl_glsl_ext_LumaMeter_getColor(wgExecutionMask);
44
- #if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_== _NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
45
- nbl_glsl_ext_LumaMeter_clearHistogram();
46
- #endif
47
- nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
48
-
49
- float logLuma;
41
+ float scaledLogLuma;
50
42
// linearize
51
43
if (wgExecutionMask)
52
44
{
@@ -58,36 +50,42 @@ void nbl_glsl_ext_LumaMeter(bool wgExecutionMask)
58
50
const float MaxLuma = intBitsToFloat(_NBL_GLSL_EXT_LUMA_METER_MAX_LUMA_DEFINED_);
59
51
luma = clamp (luma,MinLuma,MaxLuma);
60
52
61
- logLuma = log2 (luma/ MinLuma)/ log2 (MaxLuma/ MinLuma);
53
+ scaledLogLuma = log2 (luma/ MinLuma)/ log2 (MaxLuma/ MinLuma);
62
54
}
63
55
64
56
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_== _NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
65
57
// compute histogram index
66
58
int histogramIndex;
67
59
if (wgExecutionMask)
68
60
{
69
- histogramIndex = int (logLuma * float (_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT- 1u)+ 0.5 );
61
+ histogramIndex = int (scaledLogLuma * float (_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT- 1u)+ 0.5 );
70
62
histogramIndex += int (gl_LocalInvocationIndex& uint (_NBL_GLSL_EXT_LUMA_METER_LOCAL_REPLICATION- 1 ))* _NBL_GLSL_EXT_LUMA_METER_PADDED_BIN_COUNT;
71
63
}
72
64
// barrier so we "see" the cleared histogram
73
65
barrier();
74
- memoryBarrierShared();
75
66
if (wgExecutionMask)
76
67
atomicAdd(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[histogramIndex],1u);
77
-
78
- // no barrier on shared memory cause if we use it with atomics the writes and reads be coherent
68
+ // no barrier on shared memory because we read from it later and we need all atomics to be done before we read
79
69
barrier();
70
+ #endif
80
71
72
+ return scaledLogLuma;
73
+ }
74
+
75
+ nbl_glsl_ext_LumaMeter_WriteOutValue_t nbl_glsl_ext_LumaMeter_workgroup_process(in bool wgExecutionMask, in float scaledLogLuma)
76
+ {
77
+ #if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_== _NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
81
78
// join the histograms across workgroups
82
79
uint writeOutVal = _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex];
83
80
for (int i= 1 ; i< _NBL_GLSL_EXT_LUMA_METER_LOCAL_REPLICATION; i++ )
84
81
writeOutVal += _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex+ i* _NBL_GLSL_EXT_LUMA_METER_PADDED_BIN_COUNT];
82
+ return writeOutVal;
85
83
#elif _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_== _NBL_GLSL_EXT_LUMA_METER_MODE_GEOM_MEAN
86
- _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = wgExecutionMask ? floatBitsToUint(logLuma): 0u;
84
+ // TODO: use nbl_glsl_workgroupAdd reduction
85
+ _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = wgExecutionMask ? floatBitsToUint(scaledLogLuma): 0u;
87
86
for (int i= NBL_GLSL_WORKGROUP_SIZE_>> 1 ; i> 1 ; i>>= 1 )
88
87
{
89
88
barrier();
90
- memoryBarrierShared();
91
89
if (gl_LocalInvocationIndex< i)
92
90
{
93
91
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = floatBitsToUint
@@ -98,9 +96,22 @@ void nbl_glsl_ext_LumaMeter(bool wgExecutionMask)
98
96
}
99
97
}
100
98
barrier();
101
- memoryBarrierShared();
102
- float writeOutVal = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[0 ])+ uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[1 ]);
99
+ return uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[0 ])+ uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[1 ]);
100
+ #endif
101
+ }
102
+
103
+ #ifndef _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
104
+ #define _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
105
+ void nbl_glsl_ext_LumaMeter(in bool wgExecutionMask)
106
+ {
107
+ vec3 color = nbl_glsl_ext_LumaMeter_getColor(wgExecutionMask);
108
+ #if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_== _NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
109
+ nbl_glsl_ext_LumaMeter_clearHistogram();
103
110
#endif
111
+ nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
112
+
113
+ const float scaledLogLuma = nbl_glsl_ext_LumaMeter_local_process(wgExecutionMask,color);
114
+ const nbl_glsl_ext_LumaMeter_WriteOutValue_t writeOutVal = nbl_glsl_ext_LumaMeter_workgroup_process(wgExecutionMask,scaledLogLuma);
104
115
105
116
nbl_glsl_ext_LumaMeter_setFirstPassOutput(writeOutVal);
106
117
}
0 commit comments