@@ -241,7 +241,7 @@ layout(binding = 3, std430) restrict writeonly buffer IntensityBuffer
241
241
242
242
int nbl_glsl_ext_LumaMeter_getCurrentLumaOutputOffset()
243
243
{
244
- return pc.data.beforeDenoise!=0u ? 0:1 ;
244
+ return int((~ pc.data.flags)&0x1u) ;
245
245
}
246
246
nbl_glsl_ext_LumaMeter_output_SPIRV_CROSS_is_dumb_t nbl_glsl_ext_ToneMapper_getLumaMeterOutput()
247
247
{
@@ -254,18 +254,17 @@ nbl_glsl_ext_LumaMeter_output_SPIRV_CROSS_is_dumb_t nbl_glsl_ext_ToneMapper_getL
254
254
void main()
255
255
{
256
256
const bool firstInvocation = all(equal(uvec3(0,0,0),gl_GlobalInvocationID));
257
- const bool beforeDenoise = pc.data.beforeDenoise!=0u;
258
- const bool autoexposureOn = pc.data.autoexposureOff==0u;
259
257
260
258
float optixIntensity = 1.0;
261
- if (beforeDenoise||autoexposureOn )
259
+ if (bool(pc.data.flags&0x2u) )
262
260
{
263
261
nbl_glsl_ext_LumaMeter_PassInfo_t lumaPassInfo;
264
262
lumaPassInfo.percentileRange[0] = pc.data.percentileRange[0];
265
263
lumaPassInfo.percentileRange[1] = pc.data.percentileRange[1];
266
264
float measuredLumaLog2 = nbl_glsl_ext_LumaMeter_getMeasuredLumaLog2(nbl_glsl_ext_ToneMapper_getLumaMeterOutput(),lumaPassInfo);
267
265
if (firstInvocation)
268
266
{
267
+ const bool beforeDenoise = bool(pc.data.flags&0x1u);
269
268
measuredLumaLog2 += beforeDenoise ? pc.data.denoiserExposureBias:0.0;
270
269
optixIntensity = nbl_glsl_ext_LumaMeter_getOptiXIntensity(measuredLumaLog2);
271
270
}
@@ -284,10 +283,56 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
284
283
{
285
284
f16vec3_packed inBuffer[];
286
285
};
286
+ #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
287
287
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
288
288
{
289
- float16_t data[];
290
- } outBuffers[EII_COUNT]; // TODO: do FFT
289
+ f16vec2 outBuffer[];
290
+ };
291
+ #define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
292
+
293
+
294
+
295
+ #include <nbl/builtin/glsl/math/complex.glsl>
296
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
297
+ #define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
298
+
299
+
300
+ uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
301
+ {
302
+ return uvec3(pc.data.imageWidth,pc.data.imageHeight,1u);
303
+ }
304
+ bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
305
+ {
306
+ return false;
307
+ }
308
+ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
309
+ {
310
+ return 0u;
311
+ }
312
+ uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
313
+ {
314
+ return 2u;
315
+ }
316
+ uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
317
+ {
318
+ return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
319
+ }
320
+ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
321
+ {
322
+ return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
323
+ }
324
+ #define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
325
+
326
+
327
+ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
328
+ {
329
+ const uint index = ((pc.data.imageHeight*channel+coordinate.x)<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.y;
330
+ outBuffer[index] = f16vec2(complex_value);
331
+ }
332
+ #define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
333
+
334
+
335
+
291
336
void main()
292
337
{
293
338
const uint dataOffset = gl_GlobalInvocationID.y*pc.data.imageWidth+gl_GlobalInvocationID.x;
@@ -296,6 +341,29 @@ void main()
296
341
nbl_glsl_ext_LumaMeter(gl_GlobalInvocationID.x<pc.data.imageWidth);
297
342
barrier();
298
343
}
344
+
345
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
346
+ {
347
+ #if 0
348
+ if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
349
+ return nbl_glsl_complex(0.f,0.f);
350
+ #endif
351
+ const uint index = coordinate.y*pc.data.imageWidth+coordinate.x;
352
+ float data;
353
+ switch (channel)
354
+ {
355
+ case 2u:
356
+ data = float(inBuffer[index].z);
357
+ break;
358
+ case 1u:
359
+ data = float(inBuffer[index].y);
360
+ break;
361
+ default:
362
+ data = float(inBuffer[index].x);
363
+ break;
364
+ }
365
+ return nbl_glsl_complex(data,0.f);
366
+ }
299
367
)===" ));
300
368
auto interleaveAndLastFFTShader = driver->createGPUShader (core::make_smart_refctd_ptr<ICPUShader>(R"===(
301
369
#version 450 core
@@ -306,14 +374,48 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer
306
374
{
307
375
f16vec3_packed inBuffer[];
308
376
};
377
+ #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_
309
378
layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer
310
379
{
311
380
f16vec4 outBuffer[];
312
381
};
382
+ #define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_
313
383
layout(binding = 3, std430) restrict readonly buffer IntensityBuffer
314
384
{
315
385
float intensity[];
316
386
};
387
+
388
+
389
+ #include <nbl/builtin/glsl/math/complex.glsl>
390
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel);
391
+ #define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
392
+
393
+ uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
394
+ {
395
+ return uvec3(pc.data.imageWidth,pc.data.imageHeight,1u);
396
+ }
397
+ bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
398
+ {
399
+ return true;
400
+ }
401
+ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
402
+ {
403
+ return 0u;
404
+ }
405
+ uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
406
+ {
407
+ return 2u;
408
+ }
409
+ uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
410
+ {
411
+ return 10u;
412
+ }
413
+ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType()
414
+ {
415
+ return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_;
416
+ }
417
+ #define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_
418
+
317
419
void main()
318
420
{
319
421
// TODO: compute iFFT of the image
@@ -352,6 +454,15 @@ void main()
352
454
if (alive)
353
455
outBuffer[dataOffset] = f16vec4(vec4(color,1.0));
354
456
}
457
+
458
+ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channel)
459
+ {
460
+ #if 0
461
+ if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
462
+ return nbl_glsl_complex(0.f,0.f);
463
+ #endif
464
+ return nbl_glsl_complex(0.f,0.f);
465
+ }
355
466
)===" ));
356
467
struct SpecializationConstants
357
468
{
@@ -586,7 +697,7 @@ void main()
586
697
auto * fftPushConstants = outParam.fftPushConstants ;
587
698
auto * fftDispatchInfo = outParam.fftDispatchInfo ;
588
699
const ISampler::E_TEXTURE_CLAMP fftPadding[2 ] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
589
- const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim);
700
+ const auto passes = FFTClass::buildParameters< false > (false ,colorChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim);
590
701
{
591
702
// override for less work and storage (dont need to store the extra padding of the last axis after iFFT)
592
703
fftPushConstants[1 ].output_strides .x = fftPushConstants[0 ].input_strides .x ;
@@ -697,7 +808,6 @@ void main()
697
808
temporaryPixelBuffer = driver->createDeviceLocalGPUBufferOnDedMem (tempBufferSize);
698
809
if (check_error (!cuda::CCUDAHandler::defaultHandleResult (cuda::CCUDAHandler::registerBuffer (&temporaryPixelBuffer))," Could not register buffer for Denoiser scratch memory!" ))
699
810
return error_code;
700
- // TODO: allocate scratch with Nabla again
701
811
scratch = driver->createDeviceLocalGPUBufferOnDedMem (scratchBufferSize);
702
812
if (check_error (!cuda::CCUDAHandler::defaultHandleResult (cuda::CCUDAHandler::registerBuffer (&scratch)), " Could not register buffer for Denoiser temporary memory with CUDA natively!" ))
703
813
return error_code;
@@ -717,13 +827,13 @@ void main()
717
827
CommonPushConstants shaderConstants;
718
828
{
719
829
shaderConstants.imageWidth = param.width ;
830
+ shaderConstants.imageHeight = param.height ;
720
831
assert (intensityBufferOffset%IntensityValuesSize==0u );
721
- shaderConstants.beforeDenoise = 1u ;
722
832
723
833
shaderConstants.intensityBufferDWORDOffset = intensityBufferOffset/IntensityValuesSize;
724
834
shaderConstants.denoiserExposureBias = denoiserExposureBiasBundle[i].value ();
725
835
726
- shaderConstants.autoexposureOff = 0u ;
836
+ shaderConstants.flags = 0b11u ; // (autoexposureOn<<1)|beforeDenoise
727
837
switch (tonemapperBundle[i].first )
728
838
{
729
839
case DTEA_TONEMAPPER_REINHARD:
@@ -764,7 +874,7 @@ void main()
764
874
if (core::isnan (key))
765
875
{
766
876
shaderConstants.tonemapperParams [0 ] = 0.18 ;
767
- shaderConstants.autoexposureOff = 1u ;
877
+ shaderConstants.flags &= 0b01u ; // ~(autoexposureOn<<1)
768
878
}
769
879
else
770
880
shaderConstants.tonemapperParams [0 ] = key;
@@ -961,8 +1071,8 @@ void main()
961
1071
// compute post-processing
962
1072
{
963
1073
// let the shaders know we're in the second phase now
964
- shaderConstants.beforeDenoise = 0u ;
965
- driver->pushConstants (sharedPipelineLayout.get (), video::IGPUSpecializedShader::ESS_COMPUTE, offsetof (CommonPushConstants,beforeDenoise ), sizeof (uint32_t ), &shaderConstants.beforeDenoise );
1074
+ shaderConstants.flags &= 0b10u ;
1075
+ driver->pushConstants (sharedPipelineLayout.get (), video::IGPUSpecializedShader::ESS_COMPUTE, offsetof (CommonPushConstants,flags ), sizeof (uint32_t ), &shaderConstants.flags );
966
1076
// Bloom
967
1077
uint32_t workgroupCounts[2 ] = { (param.width + kComputeWGSize - 1u ) / kComputeWGSize ,param.height }; // TODO: change
968
1078
{
@@ -989,7 +1099,6 @@ void main()
989
1099
}
990
1100
991
1101
driver->bindComputePipeline (secondLumaMeterAndFirstFFTPipeline.get ());
992
- // FFTClass::dispatchHelper(driver, imageFirstFFTPipelineLayout.get(), fftPushConstants[0], fftDispatchInfo[0]);
993
1102
// dispatch
994
1103
driver->dispatch (workgroupCounts[0 ],workgroupCounts[1 ],1u );
995
1104
COpenGLExtensionHandler::extGlMemoryBarrier (GL_SHADER_STORAGE_BARRIER_BIT);
0 commit comments