Skip to content

Commit 1bf8bc2

Browse files
try to refactor autoexposure
1 parent 708da4e commit 1bf8bc2

File tree

3 files changed

+70
-52
lines changed

3 files changed

+70
-52
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -332,37 +332,44 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_
332332
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
333333
334334
335+
#define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
336+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
335337
336-
void main()
337-
{
338-
const uint dataOffset = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
339-
globalPixelData = vec3(inBuffer[dataOffset].x,inBuffer[dataOffset].y,inBuffer[dataOffset].z);
340-
341-
nbl_glsl_ext_LumaMeter(gl_GlobalInvocationID.x<pc.data.imageWidth);
342-
barrier();
343-
}
344338
345339
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
346340
{
347-
#if 0
348-
if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
349-
return nbl_glsl_complex(0.f,0.f);
350-
#endif
341+
ivec3 oldCoord = coordinate;
342+
nbl_glsl_ext_FFT_wrap_coord(coordinate);
343+
351344
const uint index = coordinate.y*pc.data.imageWidth+coordinate.x;
352-
float data;
345+
346+
nbl_glsl_complex retval;
353347
switch (channel)
354348
{
355349
case 2u:
356-
data = float(inBuffer[index].z);
357-
break;
350+
retval.z = float(inBuffer[index].z);
358351
case 1u:
359-
data = float(inBuffer[index].y);
360-
break;
352+
retval.y = float(inBuffer[index].y);
361353
default:
362-
data = float(inBuffer[index].x);
363-
break;
354+
retval.x = float(inBuffer[index].x);
364355
}
365-
return nbl_glsl_complex(data,0.f);
356+
return retval;
357+
}
358+
359+
void main()
360+
{
361+
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
362+
nbl_glsl_ext_LumaMeter_clearHistogram();
363+
#endif
364+
nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
365+
366+
//
367+
368+
// prevent overlap between different usages of shared memory
369+
barrier();
370+
371+
for(uint ch=0u; ch<=nbl_glsl_ext_FFT_Parameters_t_getMaxChannel(); ++ch)
372+
nbl_glsl_ext_FFT(nbl_glsl_ext_FFT_Parameters_t_getIsInverse(),ch);
366373
}
367374
)==="));
368375
auto interleaveAndLastFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -390,9 +397,13 @@ layout(binding = 3, std430) restrict readonly buffer IntensityBuffer
390397
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
391398
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
392399
400+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
401+
{
402+
return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
403+
}
393404
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
394405
{
395-
return uvec3(pc.data.imageWidth,pc.data.imageHeight,1u);
406+
return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
396407
}
397408
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
398409
{
@@ -406,10 +417,6 @@ uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
406417
{
407418
return 2u;
408419
}
409-
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
410-
{
411-
return 10u;
412-
}
413420
uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
414421
{
415422
return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;

include/nbl/builtin/glsl/ext/LumaMeter/common.glsl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define _NBL_GLSL_EXT_LUMA_METER_COMMON_INCLUDED_
77

88
#include <nbl/builtin/glsl/macros.glsl>
9+
#include <nbl/builtin/glsl/math/functions.glsl>
910

1011
#ifndef _NBL_GLSL_EXT_LUMA_METER_UNIFORMS_DEFINED_
1112
#define _NBL_GLSL_EXT_LUMA_METER_UNIFORMS_DEFINED_
@@ -201,6 +202,7 @@ layout(set=_NBL_GLSL_EXT_LUMA_METER_INPUT_IMAGE_SET_DEFINED_, binding=_NBL_GLSL_
201202
#if NBL_GLSL_EQUAL(_NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_,_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN)
202203
void nbl_glsl_ext_LumaMeter_clearHistogram()
203204
{
205+
// TODO: redo how we clear
204206
for (int i=0; i<_NBL_GLSL_EXT_LUMA_METER_LOCAL_REPLICATION; i++)
205207
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex+i*_NBL_GLSL_WORKGROUP_SIZE_] = 0u;
206208
#if NBL_GLSL_GREATER(_NBL_GLSL_EXT_LUMA_METER_LOCAL_REPLICATION_POW_DEFINED_,0)
@@ -221,7 +223,7 @@ layout(set=_NBL_GLSL_EXT_LUMA_METER_INPUT_IMAGE_SET_DEFINED_, binding=_NBL_GLSL_
221223
void nbl_glsl_ext_LumaMeter_clearFirstPassOutput()
222224
{
223225
#if NBL_GLSL_EQUAL(_NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_,_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN)
224-
uint globalIndex = gl_LocalInvocationIndex+gl_WorkGroupID.x*_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT;
226+
uint globalIndex = nbl_glsl_dot(uvec3(gl_LocalInvocationIndex,gl_WorkGroupID.xy),uvec3(1u,_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT,gl_NumWorkGroups.x*_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT));
225227
if (globalIndex<_NBL_GLSL_EXT_LUMA_METER_BIN_GLOBAL_COUNT)
226228
{
227229
outParams[nbl_glsl_ext_LumaMeter_getNextLumaOutputOffset()].packedHistogram[globalIndex] = 0u;
@@ -235,19 +237,19 @@ layout(set=_NBL_GLSL_EXT_LUMA_METER_INPUT_IMAGE_SET_DEFINED_, binding=_NBL_GLSL_
235237

236238

237239
#if NBL_GLSL_EQUAL(_NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_,_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN)
238-
#define WriteOutValue_t uint
240+
#define nbl_glsl_ext_LumaMeter_WriteOutValue_t uint
239241
#else
240-
#define WriteOutValue_t float
242+
#define nbl_glsl_ext_LumaMeter_WriteOutValue_t float
241243
#endif
242244

243245
#ifndef _NBL_GLSL_EXT_LUMA_METER_SET_FIRST_OUTPUT_FUNC_DECLARED_
244246
#define _NBL_GLSL_EXT_LUMA_METER_SET_FIRST_OUTPUT_FUNC_DECLARED_
245-
void nbl_glsl_ext_LumaMeter_setFirstPassOutput(in WriteOutValue_t writeOutVal);
247+
void nbl_glsl_ext_LumaMeter_setFirstPassOutput(in nbl_glsl_ext_LumaMeter_WriteOutValue_t writeOutVal);
246248
#endif
247249

248250
#ifndef _NBL_GLSL_EXT_LUMA_METER_SET_FIRST_OUTPUT_FUNC_DEFINED_
249251
#define _NBL_GLSL_EXT_LUMA_METER_SET_FIRST_OUTPUT_FUNC_DEFINED_
250-
void nbl_glsl_ext_LumaMeter_setFirstPassOutput(in WriteOutValue_t writeOutVal)
252+
void nbl_glsl_ext_LumaMeter_setFirstPassOutput(in nbl_glsl_ext_LumaMeter_WriteOutValue_t writeOutVal)
251253
{
252254
int layerIndex = nbl_glsl_ext_LumaMeter_getCurrentLumaOutputOffset();
253255
#if NBL_GLSL_EQUAL(_NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_,_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN)
@@ -264,8 +266,6 @@ layout(set=_NBL_GLSL_EXT_LUMA_METER_INPUT_IMAGE_SET_DEFINED_, binding=_NBL_GLSL_
264266
#endif
265267
}
266268
#endif
267-
268-
#undef WriteOutValue_t
269269
#endif // _NBL_GLSL_EXT_LUMA_METER_FIRST_PASS_DEFINED_
270270

271271

include/nbl/builtin/glsl/ext/LumaMeter/impl.glsl

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,12 @@ vec3 nbl_glsl_ext_LumaMeter_getColor(bool wgExecutionMask);
3333

3434
#ifndef _NBL_GLSL_EXT_LUMA_METER_IMPL_DECLARED_
3535
#define _NBL_GLSL_EXT_LUMA_METER_IMPL_DECLARED_
36-
void nbl_glsl_ext_LumaMeter(bool wgExecutionMask);
36+
void nbl_glsl_ext_LumaMeter(in bool wgExecutionMask);
3737
#endif
3838

39-
#ifndef _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
40-
#define _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
41-
void nbl_glsl_ext_LumaMeter(bool wgExecutionMask)
39+
float nbl_glsl_ext_LumaMeter_local_process(in bool wgExecutionMask, in vec3 color)
4240
{
43-
vec3 color = nbl_glsl_ext_LumaMeter_getColor(wgExecutionMask);
44-
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
45-
nbl_glsl_ext_LumaMeter_clearHistogram();
46-
#endif
47-
nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
48-
49-
float logLuma;
41+
float scaledLogLuma;
5042
// linearize
5143
if (wgExecutionMask)
5244
{
@@ -58,36 +50,42 @@ void nbl_glsl_ext_LumaMeter(bool wgExecutionMask)
5850
const float MaxLuma = intBitsToFloat(_NBL_GLSL_EXT_LUMA_METER_MAX_LUMA_DEFINED_);
5951
luma = clamp(luma,MinLuma,MaxLuma);
6052

61-
logLuma = log2(luma/MinLuma)/log2(MaxLuma/MinLuma);
53+
scaledLogLuma = log2(luma/MinLuma)/log2(MaxLuma/MinLuma);
6254
}
6355

6456
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
6557
// compute histogram index
6658
int histogramIndex;
6759
if (wgExecutionMask)
6860
{
69-
histogramIndex = int(logLuma*float(_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT-1u)+0.5);
61+
histogramIndex = int(scaledLogLuma *float(_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT-1u)+0.5);
7062
histogramIndex += int(gl_LocalInvocationIndex&uint(_NBL_GLSL_EXT_LUMA_METER_LOCAL_REPLICATION-1))*_NBL_GLSL_EXT_LUMA_METER_PADDED_BIN_COUNT;
7163
}
7264
// barrier so we "see" the cleared histogram
7365
barrier();
74-
memoryBarrierShared();
7566
if (wgExecutionMask)
7667
atomicAdd(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[histogramIndex],1u);
77-
78-
// no barrier on shared memory cause if we use it with atomics the writes and reads be coherent
68+
// no barrier on shared memory because we read from it later and we need all atomics to be done before we read
7969
barrier();
70+
#endif
8071

72+
return scaledLogLuma;
73+
}
74+
75+
nbl_glsl_ext_LumaMeter_WriteOutValue_t nbl_glsl_ext_LumaMeter_workgroup_process(in bool wgExecutionMask, in float scaledLogLuma)
76+
{
77+
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
8178
// join the histograms across workgroups
8279
uint writeOutVal = _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex];
8380
for (int i=1; i<_NBL_GLSL_EXT_LUMA_METER_LOCAL_REPLICATION; i++)
8481
writeOutVal += _NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex+i*_NBL_GLSL_EXT_LUMA_METER_PADDED_BIN_COUNT];
82+
return writeOutVal;
8583
#elif _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_GEOM_MEAN
86-
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = wgExecutionMask ? floatBitsToUint(logLuma):0u;
84+
// TODO: use nbl_glsl_workgroupAdd reduction
85+
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = wgExecutionMask ? floatBitsToUint(scaledLogLuma):0u;
8786
for (int i=NBL_GLSL_WORKGROUP_SIZE_>>1; i>1; i>>=1)
8887
{
8988
barrier();
90-
memoryBarrierShared();
9189
if (gl_LocalInvocationIndex<i)
9290
{
9391
_NBL_GLSL_SCRATCH_SHARED_DEFINED_[gl_LocalInvocationIndex] = floatBitsToUint
@@ -98,9 +96,22 @@ void nbl_glsl_ext_LumaMeter(bool wgExecutionMask)
9896
}
9997
}
10098
barrier();
101-
memoryBarrierShared();
102-
float writeOutVal = uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[0])+uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[1]);
99+
return uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[0])+uintBitsToFloat(_NBL_GLSL_SCRATCH_SHARED_DEFINED_[1]);
100+
#endif
101+
}
102+
103+
#ifndef _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
104+
#define _NBL_GLSL_EXT_LUMA_METER_IMPL_DEFINED_
105+
void nbl_glsl_ext_LumaMeter(in bool wgExecutionMask)
106+
{
107+
vec3 color = nbl_glsl_ext_LumaMeter_getColor(wgExecutionMask);
108+
#if _NBL_GLSL_EXT_LUMA_METER_MODE_DEFINED_==_NBL_GLSL_EXT_LUMA_METER_MODE_MEDIAN
109+
nbl_glsl_ext_LumaMeter_clearHistogram();
103110
#endif
111+
nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
112+
113+
const float scaledLogLuma = nbl_glsl_ext_LumaMeter_local_process(wgExecutionMask,color);
114+
const nbl_glsl_ext_LumaMeter_WriteOutValue_t writeOutVal = nbl_glsl_ext_LumaMeter_workgroup_process(wgExecutionMask,scaledLogLuma);
104115

105116
nbl_glsl_ext_LumaMeter_setFirstPassOutput(writeOutVal);
106117
}

0 commit comments

Comments
 (0)