Skip to content

Commit fe3450d

Browse files
Second tonemap still seems to work
1 parent 00dfb4c commit fe3450d

File tree

2 files changed

+42
-10
lines changed

2 files changed

+42
-10
lines changed

examples_tests/39.DenoiserTonemapper/ShaderCommon.glsl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ layout(push_constant, row_major) uniform PushConstants{
1919
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
2020
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
2121

22+
// kinda bad overdeclaration but oh well
23+
#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ 16384
24+
2225

2326
#define SHARED_CHANNELS 3
2427
struct f16vec3_packed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -336,22 +336,28 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_
336336
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
337337
338338
339+
float scaledLogLuma;
339340
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
340341
{
341342
ivec3 oldCoord = coordinate;
342343
nbl_glsl_ext_FFT_wrap_coord(coordinate);
343344
344345
const uint index = coordinate.y*pc.data.imageWidth+coordinate.x;
345346
347+
// rewrite this fetch at some point
346348
nbl_glsl_complex retval;
347349
switch (channel)
348350
{
349351
case 2u:
350-
retval.z = float(inBuffer[index].z);
352+
retval[0] = float(inBuffer[index].z);
353+
break;
351354
case 1u:
352-
retval.y = float(inBuffer[index].y);
355+
retval[0] = float(inBuffer[index].y);
356+
break;
353357
default:
354-
retval.x = float(inBuffer[index].x);
358+
scaledLogLuma += nbl_glsl_ext_LumaMeter_local_process(all(equal(coordinate,oldCoord)),vec3(inBuffer[index].x,inBuffer[index].y,inBuffer[index].z));
359+
retval[0] = float(inBuffer[index].x);
360+
break;
355361
}
356362
return retval;
357363
}
@@ -363,13 +369,36 @@ void main()
363369
#endif
364370
nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
365371
366-
//
367372
368-
// prevent overlap between different usages of shared memory
369-
barrier();
373+
for(uint channel=0u; channel<3u; channel++)
374+
{
375+
// Virtual Threads Calculation
376+
const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
377+
const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
370378
371-
for(uint ch=0u; ch<=nbl_glsl_ext_FFT_Parameters_t_getMaxChannel(); ++ch)
372-
nbl_glsl_ext_FFT(nbl_glsl_ext_FFT_Parameters_t_getIsInverse(),ch);
379+
scaledLogLuma = 0.f;
380+
// Load Values into local memory
381+
for(uint t=0u; t<item_per_thread_count; t++)
382+
{
383+
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
384+
const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
385+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),channel);
386+
}
387+
if (channel==0u)
388+
{
389+
nbl_glsl_ext_LumaMeter_setFirstPassOutput(nbl_glsl_ext_LumaMeter_workgroup_process(scaledLogLuma));
390+
// prevent overlap between different usages of shared memory
391+
barrier();
392+
}
393+
// do FFT
394+
nbl_glsl_ext_FFT_preloaded(false,log2FFTSize);
395+
// write out to main memory
396+
for(uint t=0u; t<item_per_thread_count; t++)
397+
{
398+
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
399+
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]);
400+
}
401+
}
373402
}
374403
)==="));
375404
auto interleaveAndLastFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
@@ -970,7 +999,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
970999
{
9711000
uint64_t deinterleavedPixelBytesize = getTexelOrBlockBytesize<EF_R16G16B16A16_SFLOAT>(); // TODO do it with EF_R16G16B16_SFLOAT
9721001
outImageByteOffset[j] = j*param.width*param.height*deinterleavedPixelBytesize;
973-
attachBufferImageRange(EII_COUNT+j,temporaryPixelBuffer.getObject(),outImageByteOffset[j],deinterleavedPixelBytesize);
1002+
attachBufferImageRange(EII_COUNT+j,temporaryPixelBuffer.getObject(),outImageByteOffset[j],j ? deinterleavedPixelBytesize:fftScratchSize);
9741003
}
9751004
attachWholeBuffer(EII_COUNT*2u,histogramBuffer.get());
9761005
attachWholeBuffer(EII_COUNT*2u+1u,intensityBuffer.getObject());
@@ -1107,7 +1136,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11071136

11081137
driver->bindComputePipeline(secondLumaMeterAndFirstFFTPipeline.get());
11091138
// dispatch
1110-
driver->dispatch(workgroupCounts[0],workgroupCounts[1],1u);
1139+
driver->dispatch(param.fftDispatchInfo[0].workGroupCount[0],param.fftDispatchInfo[0].workGroupCount[1],1u);
11111140
COpenGLExtensionHandler::extGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
11121141

11131142
// TODO: do X-axis pass of the DFFT

0 commit comments

Comments
 (0)