@@ -43,6 +43,7 @@ struct ImageToDenoise
43
43
uint32_t width = 0u , height = 0u ;
44
44
uint32_t colorTexelSize = 0u ;
45
45
E_IMAGE_INPUT denoiserType = EII_COUNT;
46
+ VkExtent3D scaledKernelExtent;
46
47
float bloomScale;
47
48
};
48
49
struct DenoiserToUse
@@ -220,13 +221,15 @@ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
220
221
}
221
222
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
222
223
224
+ #include "nbl/builtin/glsl/ext/FFT/parameters.glsl"
225
+
223
226
#include <nbl/builtin/glsl/math/complex.glsl>
224
227
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
225
228
{
226
- const ivec2 inputImageSize = textureSize(inputImage,0 );
227
- const ivec2 halfInputImageSize = inputImageSize>>1 ;
228
- const vec2 relativeCoords = vec2(coordinate.xy-halfInputImageSize)/pc.kernelScale ;
229
- const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/vec2(inputImageSize) +vec2(0.5),-log2(pc.kernelScale) );
229
+ const vec2 inputSize = vec2(nbl_glsl_ext_FFT_Parameters_t_getDimensions().xy );
230
+ const vec2 halfInputSize = inputSize*0.5 ;
231
+ const vec2 relativeCoords = vec2(coordinate.xy)-halfInputSize ;
232
+ const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/inputSize +vec2(0.5),0.0 );
230
233
return nbl_glsl_complex(texelValue[channel], 0.0f);
231
234
}
232
235
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
@@ -1032,25 +1035,38 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1032
1035
assert (region.bufferRowLength );
1033
1036
outParam.colorTexelSize = asset::getTexelOrBlockBytesize (colorCreationParams.format );
1034
1037
}
1035
-
1036
- const auto & kerDim = outParam.kernel ->getCreationParameters ().extent ;
1037
- const float bloomScale = core::min (float (extent.width ) / float (kerDim.width ), float (extent.height ) / float (kerDim.height ))* bloomScaleBundle[i].value ();
1038
- if (bloomScale>1 .f )
1039
- os::Printer::log (imageIDString + " Bloom Kernel loose sharpness, increase resolution of bloom kernel!" , ELL_WARNING);
1040
- const auto marginSrcDim = [extent,kerDim,bloomScale]() -> auto
1038
+
1039
+ const float bloomRelativeScale = bloomScaleBundle[i].value ();
1040
+ const auto kernelScale = [&outParam,&extent,bloomRelativeScale]() -> auto
1041
+ {
1042
+ auto kerDim = outParam.kernel ->getCreationParameters ().extent ;
1043
+ float kernelScale;
1044
+ if (extent.width <extent.height )
1045
+ kernelScale = float (extent.width )*bloomRelativeScale/float (kerDim.width );
1046
+ else
1047
+ kernelScale = float (extent.height )*bloomRelativeScale/float (kerDim.height );
1048
+ outParam.scaledKernelExtent .width = core::ceil (float (kerDim.width )*kernelScale);
1049
+ outParam.scaledKernelExtent .height = core::ceil (float (kerDim.height )*kernelScale);
1050
+ outParam.scaledKernelExtent .depth = 1u ;
1051
+ return kernelScale;
1052
+ }();
1053
+ if (kernelScale>1 .f )
1054
+ os::Printer::log (imageIDString + " Bloom Kernel loose sharpness, increase resolution of bloom kernel or reduce its relative scale!" , ELL_WARNING);
1055
+ const auto marginSrcDim = [extent,outParam]() -> auto
1041
1056
{
1042
1057
auto tmp = extent;
1043
1058
for (auto i=0u ; i<3u ; i++)
1044
1059
{
1045
- const auto coord = (&kerDim .width )[i];
1060
+ const auto coord = (&outParam. scaledKernelExtent .width )[i];
1046
1061
if (coord>1u )
1047
- (&tmp.width )[i] += core::max ( coord*bloomScale, 1u ) -1u ;
1062
+ (&tmp.width )[i] += coord-1u ;
1048
1063
}
1049
1064
return tmp;
1050
1065
}();
1051
- outParam.bloomScale = bloomScale ;
1052
- fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,kerDim ,colorChannelsFFT)*2u ,fftScratchSize);
1066
+ outParam.bloomScale = kernelScale ;
1067
+ fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,outParam. scaledKernelExtent ,colorChannelsFFT)*2u ,fftScratchSize);
1053
1068
fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize);
1069
+ // TODO: maybe move them to nested loop and compute JIT
1054
1070
{
1055
1071
auto * fftPushConstants = outParam.fftPushConstants ;
1056
1072
auto * fftDispatchInfo = outParam.fftDispatchInfo ;
@@ -1319,15 +1335,14 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1319
1335
}
1320
1336
1321
1337
// kernel outputs
1322
- const auto kerDim = kerImageView->getCreationParameters ().image ->getCreationParameters ().extent ;
1323
- const VkExtent3D paddedKerDim = FFTClass::padDimensions (kerImageView->getCreationParameters ().image ->getCreationParameters ().extent );
1338
+ auto paddedKernelExtent = FFTClass::padDimensions (param.scaledKernelExtent );
1324
1339
for (uint32_t i=0u ; i<colorChannelsFFT; i++)
1325
1340
{
1326
1341
video::IGPUImage::SCreationParams imageParams;
1327
1342
imageParams.flags = static_cast <asset::IImage::E_CREATE_FLAGS>(0u );
1328
1343
imageParams.type = asset::IImage::ET_2D;
1329
1344
imageParams.format = EF_R32G32_SFLOAT;
1330
- imageParams.extent = { paddedKerDim .width ,paddedKerDim .height ,1u };
1345
+ imageParams.extent = {paddedKernelExtent .width ,paddedKernelExtent .height ,1u };
1331
1346
imageParams.mipLevels = 1u ;
1332
1347
imageParams.arrayLayers = 1u ;
1333
1348
imageParams.samples = asset::IImage::ESCF_1_BIT;
@@ -1348,7 +1363,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1348
1363
FFTClass::Parameters_t fftPushConstants[2 ];
1349
1364
FFTClass::DispatchInfo_t fftDispatchInfo[2 ];
1350
1365
const ISampler::E_TEXTURE_CLAMP fftPadding[2 ] = { ISampler::ETC_CLAMP_TO_BORDER,ISampler::ETC_CLAMP_TO_BORDER };
1351
- const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,kerDim ,fftPushConstants,fftDispatchInfo,fftPadding);
1366
+ const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,param. scaledKernelExtent ,fftPushConstants,fftDispatchInfo,fftPadding);
1352
1367
1353
1368
// the kernel's FFTs
1354
1369
{
@@ -1391,12 +1406,12 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
1391
1406
{
1392
1407
NormalizationPushConstants normalizationPC;
1393
1408
normalizationPC.stride = fftPushConstants[1 ].output_strides ;
1394
- normalizationPC.bitreverse_shift .x = 32 -core::findMSB (paddedKerDim .width );
1395
- normalizationPC.bitreverse_shift .y = 32 -core::findMSB (paddedKerDim .height );
1409
+ normalizationPC.bitreverse_shift .x = 32 -core::findMSB (paddedKernelExtent .width );
1410
+ normalizationPC.bitreverse_shift .y = 32 -core::findMSB (paddedKernelExtent .height );
1396
1411
normalizationPC.bitreverse_shift .z = 0 ;
1397
1412
driver->pushConstants (kernelNormalizationPipeline->getLayout (),ICPUSpecializedShader::ESS_COMPUTE,0u ,sizeof (normalizationPC),&normalizationPC);
1398
- const uint32_t dispatchSizeX = (paddedKerDim .width -1u )/16u +1u ;
1399
- const uint32_t dispatchSizeY = (paddedKerDim .height -1u )/16u +1u ;
1413
+ const uint32_t dispatchSizeX = (paddedKernelExtent .width -1u )/16u +1u ;
1414
+ const uint32_t dispatchSizeY = (paddedKernelExtent .height -1u )/16u +1u ;
1400
1415
driver->dispatch (dispatchSizeX,dispatchSizeY,colorChannelsFFT);
1401
1416
}
1402
1417
FFTClass::defaultBarrier ();
0 commit comments