Skip to content

Commit 36aeb6b

Browse files
FFT integrated and works, now time for the other axis
1 parent fe3450d commit 36aeb6b

File tree

1 file changed

+90
-52
lines changed
  • examples_tests/39.DenoiserTonemapper

1 file changed

+90
-52
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 90 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,9 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
284284
f16vec3_packed inBuffer[];
285285
};
286286
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
287-
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
287+
layout(binding = 1, std430) restrict writeonly buffer SpectrumOutputBuffer
288288
{
289-
f16vec2 outBuffer[];
289+
f16vec2 outSpectrum[];
290290
};
291291
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
292292
@@ -326,8 +326,8 @@ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
326326
327327
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
328328
{
329-
const uint index = ((pc.data.imageHeight*channel+coordinate.x)<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.y;
330-
outBuffer[index] = f16vec2(complex_value);
329+
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
330+
outSpectrum[index] = f16vec2(complex_value);
331331
}
332332
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
333333
@@ -345,7 +345,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
345345
const uint index = coordinate.y*pc.data.imageWidth+coordinate.x;
346346
347347
// rewrite this fetch at some point
348-
nbl_glsl_complex retval;
348+
nbl_glsl_complex retval; retval.y = 0.0;
349349
switch (channel)
350350
{
351351
case 2u:
@@ -370,12 +370,11 @@ void main()
370370
nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
371371
372372
373+
// Virtual Threads Calculation
374+
const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
375+
const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
373376
for(uint channel=0u; channel<3u; channel++)
374377
{
375-
// Virtual Threads Calculation
376-
const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
377-
const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
378-
379378
scaledLogLuma = 0.f;
380379
// Load Values into local memory
381380
for(uint t=0u; t<item_per_thread_count; t++)
@@ -404,18 +403,25 @@ void main()
404403
auto interleaveAndLastFFTShader = driver->createGPUShader(core::make_smart_refctd_ptr<ICPUShader>(R"===(
405404
#version 450 core
406405
#extension GL_EXT_shader_16bit_storage : require
406+
407+
// nasty and ugly but oh well
408+
#define _NBL_GLSL_SCRATCH_SHARED_DEFINED_ sharedScratch
409+
#define _NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_ 1024
410+
shared uint _NBL_GLSL_SCRATCH_SHARED_DEFINED_[_NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_];
411+
412+
407413
#include "../ShaderCommon.glsl"
408414
#include "nbl/builtin/glsl/ext/ToneMapper/operators.glsl"
409-
layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
410-
{
411-
f16vec3_packed inBuffer[];
412-
};
413-
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
414-
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
415+
layout(binding = 0, std430) restrict buffer ImageOutputBuffer
415416
{
416417
f16vec4 outBuffer[];
417418
};
418419
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
420+
layout(binding = 1, std430) restrict readonly buffer SpectrumInputBuffer
421+
{
422+
f16vec2 inSpectrum[];
423+
};
424+
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
419425
layout(binding = 3, std430) restrict readonly buffer IntensityBuffer
420426
{
421427
float intensity[];
@@ -452,52 +458,88 @@ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
452458
}
453459
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
454460
455-
void main()
461+
462+
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
456463
{
457-
// TODO: compute iFFT of the image
458-
const uint inAddr = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
459-
bool alive = gl_GlobalInvocationID.x<pc.data.imageWidth;
460-
vec3 color = vec3(inBuffer[inAddr].x,inBuffer[inAddr].y,inBuffer[inAddr].z);
464+
ivec2 coords = ivec2(coordinate.xy);
465+
const uint padding_size = (0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())-pc.data.imageWidth;
466+
coords.x -= int(padding_size>>1u);
467+
if (coords.x<0 || coords.x>int(pc.data.imageWidth))
468+
return;
461469
462-
color = _NBL_GLSL_EXT_LUMA_METER_XYZ_CONVERSION_MATRIX_DEFINED_*color;
463-
color *= intensity[pc.data.intensityBufferDWORDOffset]; // *= 0.18/AvgLuma
464-
switch (pc.data.tonemappingOperator)
470+
uint dataOffset = coords.y*pc.data.inImageTexelPitch[EII_COLOR]+coords.x;
471+
vec3 color = vec4(outBuffer[dataOffset]).xyz;
472+
color[channel] = complex_value.x;
473+
if (channel==nbl_glsl_ext_FFT_Parameters_t_getMaxChannel())
465474
{
466-
case _NBL_GLSL_EXT_TONE_MAPPER_REINHARD_OPERATOR:
475+
color = _NBL_GLSL_EXT_LUMA_METER_XYZ_CONVERSION_MATRIX_DEFINED_*color;
476+
color *= intensity[pc.data.intensityBufferDWORDOffset]; // *= 0.18/AvgLuma
477+
switch (pc.data.tonemappingOperator)
467478
{
468-
nbl_glsl_ext_ToneMapper_ReinhardParams_t tonemapParams;
469-
tonemapParams.keyAndManualLinearExposure = pc.data.tonemapperParams[0];
470-
tonemapParams.rcpWhite2 = pc.data.tonemapperParams[1];
471-
color = nbl_glsl_ext_ToneMapper_Reinhard(tonemapParams,color);
472-
break;
479+
case _NBL_GLSL_EXT_TONE_MAPPER_REINHARD_OPERATOR:
480+
{
481+
nbl_glsl_ext_ToneMapper_ReinhardParams_t tonemapParams;
482+
tonemapParams.keyAndManualLinearExposure = pc.data.tonemapperParams[0];
483+
tonemapParams.rcpWhite2 = pc.data.tonemapperParams[1];
484+
color = nbl_glsl_ext_ToneMapper_Reinhard(tonemapParams,color);
485+
break;
486+
}
487+
case _NBL_GLSL_EXT_TONE_MAPPER_ACES_OPERATOR:
488+
{
489+
nbl_glsl_ext_ToneMapper_ACESParams_t tonemapParams;
490+
tonemapParams.gamma = pc.data.tonemapperParams[0];
491+
tonemapParams.exposure = pc.data.tonemapperParams[1];
492+
color = nbl_glsl_ext_ToneMapper_ACES(tonemapParams,color);
493+
break;
494+
}
495+
default:
496+
{
497+
color *= pc.data.tonemapperParams[0];
498+
break;
499+
}
473500
}
474-
case _NBL_GLSL_EXT_TONE_MAPPER_ACES_OPERATOR:
501+
color = nbl_glsl_XYZtosRGB*color;
502+
}
503+
outBuffer[dataOffset] = f16vec4(vec4(color,1.f));
504+
}
505+
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
506+
507+
508+
#define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
509+
#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
510+
511+
512+
void main()
513+
{
514+
// Virtual Threads Calculation
515+
const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
516+
const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
517+
for(uint channel=0u; channel<3u; channel++)
518+
{
519+
// Load Values into local memory
520+
for(uint t=0u; t<item_per_thread_count; t++)
475521
{
476-
nbl_glsl_ext_ToneMapper_ACESParams_t tonemapParams;
477-
tonemapParams.gamma = pc.data.tonemapperParams[0];
478-
tonemapParams.exposure = pc.data.tonemapperParams[1];
479-
color = nbl_glsl_ext_ToneMapper_ACES(tonemapParams,color);
480-
break;
522+
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
523+
const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
524+
nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),channel);
481525
}
482-
default:
526+
// do FFT
527+
nbl_glsl_ext_FFT_preloaded(true,log2FFTSize);
528+
// write out to main memory
529+
for(uint t=0u; t<item_per_thread_count; t++)
483530
{
484-
color *= pc.data.tonemapperParams[0];
485-
break;
531+
const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
532+
nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]);
486533
}
487534
}
488-
color = nbl_glsl_XYZtosRGB*color;
489-
uint dataOffset = gl_GlobalInvocationID.y*pc.data.inImageTexelPitch[EII_COLOR]+gl_GlobalInvocationID.x;
490-
if (alive)
491-
outBuffer[dataOffset] = f16vec4(vec4(color,1.0));
492535
}
493536
494537
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
495538
{
496-
#if 0
497539
if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
498540
return nbl_glsl_complex(0.f,0.f);
499-
#endif
500-
return nbl_glsl_complex(0.f,0.f);
541+
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
542+
return nbl_glsl_complex(inSpectrum[index]);
501543
}
502544
)==="));
503545
struct SpecializationConstants
@@ -1139,11 +1181,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11391181
driver->dispatch(param.fftDispatchInfo[0].workGroupCount[0],param.fftDispatchInfo[0].workGroupCount[1],1u);
11401182
COpenGLExtensionHandler::extGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
11411183

1142-
// TODO: do X-axis pass of the DFFT
1143-
1144-
// TODO: multiply the spectra together
1145-
1146-
// TODO: perform inverse Y-axis DFFT and interleave the results
1184+
// TODO: Y-axis FFT, multiply the spectra together, y-axis iFFT
11471185

11481186
// bind intensity pipeline
11491187
driver->bindComputePipeline(intensityPipeline.get());
@@ -1154,7 +1192,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11541192
// Tonemap and interleave the output
11551193
{
11561194
driver->bindComputePipeline(interleaveAndLastFFTPipeline.get());
1157-
driver->dispatch(workgroupCounts[0],workgroupCounts[1],1u);
1195+
driver->dispatch(param.fftDispatchInfo[2].workGroupCount[0],param.fftDispatchInfo[2].workGroupCount[1],1u);
11581196
// issue a full memory barrier (or at least all buffer read/write barrier)
11591197
COpenGLExtensionHandler::extGlMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
11601198
}
@@ -1198,7 +1236,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11981236
continue;
11991237
}
12001238

1201-
driver->copyBuffer(temporaryPixelBuffer.getObject(),downloadStagingArea->getBuffer(),0u,address,colorBufferBytesize);
1239+
driver->copyBuffer(colorPixelBuffer.getObject(),downloadStagingArea->getBuffer(),0u,address,colorBufferBytesize);
12021240
}
12031241
auto downloadFence = driver->placeFence(true);
12041242

0 commit comments

Comments
 (0)