Skip to content

Commit f1c60ea

Browse files
new algorithm of kernel computation almost works
1 parent 33aaf7d commit f1c60ea

File tree

1 file changed

+37
-22
lines changed
  • examples_tests/39.DenoiserTonemapper

1 file changed

+37
-22
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct ImageToDenoise
4343
uint32_t width = 0u, height = 0u;
4444
uint32_t colorTexelSize = 0u;
4545
E_IMAGE_INPUT denoiserType = EII_COUNT;
46+
VkExtent3D scaledKernelExtent;
4647
float bloomScale;
4748
};
4849
struct DenoiserToUse
@@ -220,13 +221,15 @@ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
220221
}
221222
#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_
222223
224+
#include "nbl/builtin/glsl/ext/FFT/parameters.glsl"
225+
223226
#include <nbl/builtin/glsl/math/complex.glsl>
224227
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
225228
{
226-
const ivec2 inputImageSize = textureSize(inputImage,0);
227-
const ivec2 halfInputImageSize = inputImageSize>>1;
228-
const vec2 relativeCoords = vec2(coordinate.xy-halfInputImageSize)/pc.kernelScale;
229-
const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/vec2(inputImageSize)+vec2(0.5),-log2(pc.kernelScale));
229+
const vec2 inputSize = vec2(nbl_glsl_ext_FFT_Parameters_t_getDimensions().xy);
230+
const vec2 halfInputSize = inputSize*0.5;
231+
const vec2 relativeCoords = vec2(coordinate.xy)-halfInputSize;
232+
const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/inputSize+vec2(0.5),0.0);
230233
return nbl_glsl_complex(texelValue[channel], 0.0f);
231234
}
232235
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
@@ -1032,25 +1035,38 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
10321035
assert(region.bufferRowLength);
10331036
outParam.colorTexelSize = asset::getTexelOrBlockBytesize(colorCreationParams.format);
10341037
}
1035-
1036-
const auto& kerDim = outParam.kernel->getCreationParameters().extent;
1037-
const float bloomScale = core::min(float(extent.width) / float(kerDim.width), float(extent.height) / float(kerDim.height))* bloomScaleBundle[i].value();
1038-
if (bloomScale>1.f)
1039-
os::Printer::log(imageIDString + "Bloom Kernel loose sharpness, increase resolution of bloom kernel!", ELL_WARNING);
1040-
const auto marginSrcDim = [extent,kerDim,bloomScale]() -> auto
1038+
1039+
const float bloomRelativeScale = bloomScaleBundle[i].value();
1040+
const auto kernelScale = [&outParam,&extent,bloomRelativeScale]() -> auto
1041+
{
1042+
auto kerDim = outParam.kernel->getCreationParameters().extent;
1043+
float kernelScale;
1044+
if (extent.width<extent.height)
1045+
kernelScale = float(extent.width)*bloomRelativeScale/float(kerDim.width);
1046+
else
1047+
kernelScale = float(extent.height)*bloomRelativeScale/float(kerDim.height);
1048+
outParam.scaledKernelExtent.width = core::ceil(float(kerDim.width)*kernelScale);
1049+
outParam.scaledKernelExtent.height = core::ceil(float(kerDim.height)*kernelScale);
1050+
outParam.scaledKernelExtent.depth = 1u;
1051+
return kernelScale;
1052+
}();
1053+
if (kernelScale>1.f)
1054+
os::Printer::log(imageIDString + "Bloom Kernel loose sharpness, increase resolution of bloom kernel or reduce its relative scale!", ELL_WARNING);
1055+
const auto marginSrcDim = [extent,outParam]() -> auto
10411056
{
10421057
auto tmp = extent;
10431058
for (auto i=0u; i<3u; i++)
10441059
{
1045-
const auto coord = (&kerDim.width)[i];
1060+
const auto coord = (&outParam.scaledKernelExtent.width)[i];
10461061
if (coord>1u)
1047-
(&tmp.width)[i] += core::max(coord*bloomScale,1u)-1u;
1062+
(&tmp.width)[i] += coord-1u;
10481063
}
10491064
return tmp;
10501065
}();
1051-
outParam.bloomScale = bloomScale;
1052-
fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,kerDim,colorChannelsFFT)*2u,fftScratchSize);
1066+
outParam.bloomScale = kernelScale;
1067+
fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,outParam.scaledKernelExtent,colorChannelsFFT)*2u,fftScratchSize);
10531068
fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize);
1069+
// TODO: maybe move them to nested loop and compute JIT
10541070
{
10551071
auto* fftPushConstants = outParam.fftPushConstants;
10561072
auto* fftDispatchInfo = outParam.fftDispatchInfo;
@@ -1319,15 +1335,14 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13191335
}
13201336

13211337
// kernel outputs
1322-
const auto kerDim = kerImageView->getCreationParameters().image->getCreationParameters().extent;
1323-
const VkExtent3D paddedKerDim = FFTClass::padDimensions(kerImageView->getCreationParameters().image->getCreationParameters().extent);
1338+
auto paddedKernelExtent = FFTClass::padDimensions(param.scaledKernelExtent);
13241339
for (uint32_t i=0u; i<colorChannelsFFT; i++)
13251340
{
13261341
video::IGPUImage::SCreationParams imageParams;
13271342
imageParams.flags = static_cast<asset::IImage::E_CREATE_FLAGS>(0u);
13281343
imageParams.type = asset::IImage::ET_2D;
13291344
imageParams.format = EF_R32G32_SFLOAT;
1330-
imageParams.extent = { paddedKerDim.width,paddedKerDim.height,1u};
1345+
imageParams.extent = {paddedKernelExtent.width,paddedKernelExtent.height,1u};
13311346
imageParams.mipLevels = 1u;
13321347
imageParams.arrayLayers = 1u;
13331348
imageParams.samples = asset::IImage::ESCF_1_BIT;
@@ -1348,7 +1363,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13481363
FFTClass::Parameters_t fftPushConstants[2];
13491364
FFTClass::DispatchInfo_t fftDispatchInfo[2];
13501365
const ISampler::E_TEXTURE_CLAMP fftPadding[2] = { ISampler::ETC_CLAMP_TO_BORDER,ISampler::ETC_CLAMP_TO_BORDER };
1351-
const auto passes = FFTClass::buildParameters(false,colorChannelsFFT,kerDim,fftPushConstants,fftDispatchInfo,fftPadding);
1366+
const auto passes = FFTClass::buildParameters(false,colorChannelsFFT,param.scaledKernelExtent,fftPushConstants,fftDispatchInfo,fftPadding);
13521367

13531368
// the kernel's FFTs
13541369
{
@@ -1391,12 +1406,12 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
13911406
{
13921407
NormalizationPushConstants normalizationPC;
13931408
normalizationPC.stride = fftPushConstants[1].output_strides;
1394-
normalizationPC.bitreverse_shift.x = 32-core::findMSB(paddedKerDim.width);
1395-
normalizationPC.bitreverse_shift.y = 32-core::findMSB(paddedKerDim.height);
1409+
normalizationPC.bitreverse_shift.x = 32-core::findMSB(paddedKernelExtent.width);
1410+
normalizationPC.bitreverse_shift.y = 32-core::findMSB(paddedKernelExtent.height);
13961411
normalizationPC.bitreverse_shift.z = 0;
13971412
driver->pushConstants(kernelNormalizationPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(normalizationPC),&normalizationPC);
1398-
const uint32_t dispatchSizeX = (paddedKerDim.width-1u)/16u+1u;
1399-
const uint32_t dispatchSizeY = (paddedKerDim.height-1u)/16u+1u;
1413+
const uint32_t dispatchSizeX = (paddedKernelExtent.width-1u)/16u+1u;
1414+
const uint32_t dispatchSizeY = (paddedKernelExtent.height-1u)/16u+1u;
14001415
driver->dispatch(dispatchSizeX,dispatchSizeY,colorChannelsFFT);
14011416
}
14021417
FFTClass::defaultBarrier();

0 commit comments

Comments
 (0)