@@ -284,9 +284,9 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
284
284
f16vec3_packed inBuffer[];
285
285
};
286
286
#define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
287
- layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
287
+ layout(binding = 1, std430) restrict writeonly buffer SpectrumOutputBuffer
288
288
{
289
- f16vec2 outBuffer [];
289
+ f16vec2 outSpectrum [];
290
290
};
291
291
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
292
292
@@ -326,8 +326,8 @@ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
326
326
327
327
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
328
328
{
329
- const uint index = ((pc.data.imageHeight* channel+coordinate.x) <<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.y;
330
- outBuffer [index] = f16vec2(complex_value);
329
+ const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight +coordinate.y;
330
+ outSpectrum [index] = f16vec2(complex_value);
331
331
}
332
332
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
333
333
@@ -345,7 +345,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
345
345
const uint index = coordinate.y*pc.data.imageWidth+coordinate.x;
346
346
347
347
// rewrite this fetch at some point
348
- nbl_glsl_complex retval;
348
+ nbl_glsl_complex retval; retval.y = 0.0;
349
349
switch (channel)
350
350
{
351
351
case 2u:
@@ -370,12 +370,11 @@ void main()
370
370
nbl_glsl_ext_LumaMeter_clearFirstPassOutput();
371
371
372
372
373
+ // Virtual Threads Calculation
374
+ const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
375
+ const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
373
376
for(uint channel=0u; channel<3u; channel++)
374
377
{
375
- // Virtual Threads Calculation
376
- const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
377
- const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
378
-
379
378
scaledLogLuma = 0.f;
380
379
// Load Values into local memory
381
380
for(uint t=0u; t<item_per_thread_count; t++)
@@ -404,18 +403,25 @@ void main()
404
403
auto interleaveAndLastFFTShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
405
404
#version 450 core
406
405
#extension GL_EXT_shader_16bit_storage : require
406
+
407
+ // nasty and ugly but oh well
408
+ #define _NBL_GLSL_SCRATCH_SHARED_DEFINED_ sharedScratch
409
+ #define _NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_ 1024
410
+ shared uint _NBL_GLSL_SCRATCH_SHARED_DEFINED_[_NBL_GLSL_SCRATCH_SHARED_SIZE_DEFINED_];
411
+
412
+
407
413
#include "../ShaderCommon.glsl"
408
414
#include "nbl/builtin/glsl/ext/ToneMapper/operators.glsl"
409
- layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
410
- {
411
- f16vec3_packed inBuffer[];
412
- };
413
- #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
414
- layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
415
+ layout(binding = 0, std430) restrict buffer ImageOutputBuffer
415
416
{
416
417
f16vec4 outBuffer[];
417
418
};
418
419
#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
420
+ layout(binding = 1, std430) restrict readonly buffer SpectrumInputBuffer
421
+ {
422
+ f16vec2 inSpectrum[];
423
+ };
424
+ #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
419
425
layout(binding = 3, std430) restrict readonly buffer IntensityBuffer
420
426
{
421
427
float intensity[];
@@ -452,52 +458,88 @@ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
452
458
}
453
459
#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
454
460
455
- void main()
461
+
462
+ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
456
463
{
457
- // TODO: compute iFFT of the image
458
- const uint inAddr = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
459
- bool alive = gl_GlobalInvocationID.x<pc.data.imageWidth;
460
- vec3 color = vec3(inBuffer[inAddr].x,inBuffer[inAddr].y,inBuffer[inAddr].z);
464
+ ivec2 coords = ivec2(coordinate.xy);
465
+ const uint padding_size = (0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())-pc.data.imageWidth;
466
+ coords.x -= int(padding_size>>1u);
467
+ if (coords.x<0 || coords.x>int(pc.data.imageWidth))
468
+ return;
461
469
462
- color = _NBL_GLSL_EXT_LUMA_METER_XYZ_CONVERSION_MATRIX_DEFINED_*color;
463
- color *= intensity[pc.data.intensityBufferDWORDOffset]; // *= 0.18/AvgLuma
464
- switch (pc.data.tonemappingOperator)
470
+ uint dataOffset = coords.y*pc.data.inImageTexelPitch[EII_COLOR]+coords.x;
471
+ vec3 color = vec4(outBuffer[dataOffset]).xyz;
472
+ color[channel] = complex_value.x;
473
+ if (channel==nbl_glsl_ext_FFT_Parameters_t_getMaxChannel())
465
474
{
466
- case _NBL_GLSL_EXT_TONE_MAPPER_REINHARD_OPERATOR:
475
+ color = _NBL_GLSL_EXT_LUMA_METER_XYZ_CONVERSION_MATRIX_DEFINED_*color;
476
+ color *= intensity[pc.data.intensityBufferDWORDOffset]; // *= 0.18/AvgLuma
477
+ switch (pc.data.tonemappingOperator)
467
478
{
468
- nbl_glsl_ext_ToneMapper_ReinhardParams_t tonemapParams;
469
- tonemapParams.keyAndManualLinearExposure = pc.data.tonemapperParams[0];
470
- tonemapParams.rcpWhite2 = pc.data.tonemapperParams[1];
471
- color = nbl_glsl_ext_ToneMapper_Reinhard(tonemapParams,color);
472
- break;
479
+ case _NBL_GLSL_EXT_TONE_MAPPER_REINHARD_OPERATOR:
480
+ {
481
+ nbl_glsl_ext_ToneMapper_ReinhardParams_t tonemapParams;
482
+ tonemapParams.keyAndManualLinearExposure = pc.data.tonemapperParams[0];
483
+ tonemapParams.rcpWhite2 = pc.data.tonemapperParams[1];
484
+ color = nbl_glsl_ext_ToneMapper_Reinhard(tonemapParams,color);
485
+ break;
486
+ }
487
+ case _NBL_GLSL_EXT_TONE_MAPPER_ACES_OPERATOR:
488
+ {
489
+ nbl_glsl_ext_ToneMapper_ACESParams_t tonemapParams;
490
+ tonemapParams.gamma = pc.data.tonemapperParams[0];
491
+ tonemapParams.exposure = pc.data.tonemapperParams[1];
492
+ color = nbl_glsl_ext_ToneMapper_ACES(tonemapParams,color);
493
+ break;
494
+ }
495
+ default:
496
+ {
497
+ color *= pc.data.tonemapperParams[0];
498
+ break;
499
+ }
473
500
}
474
- case _NBL_GLSL_EXT_TONE_MAPPER_ACES_OPERATOR:
501
+ color = nbl_glsl_XYZtosRGB*color;
502
+ }
503
+ outBuffer[dataOffset] = f16vec4(vec4(color,1.f));
504
+ }
505
+ #define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
506
+
507
+
508
+ #define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_
509
+ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
510
+
511
+
512
+ void main()
513
+ {
514
+ // Virtual Threads Calculation
515
+ const uint log2FFTSize = nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize();
516
+ const uint item_per_thread_count = 0x1u<<(log2FFTSize-_NBL_GLSL_WORKGROUP_SIZE_LOG2_);
517
+ for(uint channel=0u; channel<3u; channel++)
518
+ {
519
+ // Load Values into local memory
520
+ for(uint t=0u; t<item_per_thread_count; t++)
475
521
{
476
- nbl_glsl_ext_ToneMapper_ACESParams_t tonemapParams;
477
- tonemapParams.gamma = pc.data.tonemapperParams[0];
478
- tonemapParams.exposure = pc.data.tonemapperParams[1];
479
- color = nbl_glsl_ext_ToneMapper_ACES(tonemapParams,color);
480
- break;
522
+ const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex;
523
+ const uint trueDim = nbl_glsl_ext_FFT_Parameters_t_getDimensions()[nbl_glsl_ext_FFT_Parameters_t_getDirection()];
524
+ nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_ext_FFT_getPaddedData(nbl_glsl_ext_FFT_getPaddedCoordinates(tid,log2FFTSize,trueDim),channel);
481
525
}
482
- default:
526
+ // do FFT
527
+ nbl_glsl_ext_FFT_preloaded(true,log2FFTSize);
528
+ // write out to main memory
529
+ for(uint t=0u; t<item_per_thread_count; t++)
483
530
{
484
- color *= pc.data.tonemapperParams[0] ;
485
- break ;
531
+ const uint tid = (t<<_NBL_GLSL_WORKGROUP_SIZE_LOG2_)|gl_LocalInvocationIndex ;
532
+ nbl_glsl_ext_FFT_setData(nbl_glsl_ext_FFT_getCoordinates(tid),channel,nbl_glsl_ext_FFT_impl_values[t]) ;
486
533
}
487
534
}
488
- color = nbl_glsl_XYZtosRGB*color;
489
- uint dataOffset = gl_GlobalInvocationID.y*pc.data.inImageTexelPitch[EII_COLOR]+gl_GlobalInvocationID.x;
490
- if (alive)
491
- outBuffer[dataOffset] = f16vec4(vec4(color,1.0));
492
535
}
493
536
494
537
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
495
538
{
496
- #if 0
497
539
if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
498
540
return nbl_glsl_complex(0.f,0.f);
499
- #endif
500
- return nbl_glsl_complex(0.f,0.f );
541
+ const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
542
+ return nbl_glsl_complex(inSpectrum[index] );
501
543
}
502
544
)===" ));
503
545
struct SpecializationConstants
@@ -1139,11 +1181,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1139
1181
driver->dispatch (param.fftDispatchInfo [0 ].workGroupCount [0 ],param.fftDispatchInfo [0 ].workGroupCount [1 ],1u );
1140
1182
COpenGLExtensionHandler::extGlMemoryBarrier (GL_SHADER_STORAGE_BARRIER_BIT);
1141
1183
1142
- // TODO: do X-axis pass of the DFFT
1143
-
1144
- // TODO: multiply the spectra together
1145
-
1146
- // TODO: perform inverse Y-axis DFFT and interleave the results
1184
+ // TODO: Y-axis FFT, multiply the spectra together, y-axis iFFT
1147
1185
1148
1186
// bind intensity pipeline
1149
1187
driver->bindComputePipeline (intensityPipeline.get ());
@@ -1154,7 +1192,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1154
1192
// Tonemap and interleave the output
1155
1193
{
1156
1194
driver->bindComputePipeline (interleaveAndLastFFTPipeline.get ());
1157
- driver->dispatch (workgroupCounts[ 0 ],workgroupCounts [1 ],1u );
1195
+ driver->dispatch (param. fftDispatchInfo [ 2 ]. workGroupCount [ 0 ],param. fftDispatchInfo [ 2 ]. workGroupCount [1 ],1u );
1158
1196
// issue a full memory barrier (or at least all buffer read/write barrier)
1159
1197
COpenGLExtensionHandler::extGlMemoryBarrier (GL_BUFFER_UPDATE_BARRIER_BIT);
1160
1198
}
@@ -1198,7 +1236,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1198
1236
continue ;
1199
1237
}
1200
1238
1201
- driver->copyBuffer (temporaryPixelBuffer .getObject (),downloadStagingArea->getBuffer (),0u ,address,colorBufferBytesize);
1239
+ driver->copyBuffer (colorPixelBuffer .getObject (),downloadStagingArea->getBuffer (),0u ,address,colorBufferBytesize);
1202
1240
}
1203
1241
auto downloadFence = driver->placeFence (true );
1204
1242
0 commit comments