Skip to content

Commit 33aaf7d

Browse files
fix FFT expansion handling bug (and a few bugs on Debug builds)
1 parent 043aa92 commit 33aaf7d

File tree

5 files changed

+38
-22
lines changed

5 files changed

+38
-22
lines changed

examples_tests/39.DenoiserTonemapper/CommandLineHandler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ nbl::core::matrix3x4SIMD CommandLineHandler::getCameraTransform(uint64_t id)
325325

326326
auto startTime = std::chrono::steady_clock::now();
327327
auto meshes_bundle = assetManager->getAsset(filePath.data(), mitsubaLoaderParams);
328-
assert(!meshes_bundle.isEmpty(), ("ERROR (" + std::to_string(__LINE__) + " line): The xml file is invalid! Id of input stride: " + std::to_string(id)).c_str());
328+
assert(!meshes_bundle.getContents().empty(), ("ERROR (" + std::to_string(__LINE__) + " line): The xml file is invalid! Id of input stride: " + std::to_string(id)).c_str());
329329
auto endTime = std::chrono::steady_clock::now();
330330
elapsedTimeXmls += (endTime - startTime);
331331

examples_tests/39.DenoiserTonemapper/CommonPushConstants.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct CommonPushConstants
1313
uint inImageTexelPitch[3];
1414
uint imageWidth;
1515
uint imageHeight;
16-
uint fftSizeLog2; // TODO: use this
16+
uint padding;
1717
vec2 kernel_half_pixel_size;
1818

1919
// luma meter and tonemapping var but also for denoiser

examples_tests/39.DenoiserTonemapper/ShaderCommon.glsl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ layout(push_constant, row_major) uniform PushConstants{
1919
#define _NBL_GLSL_EXT_LUMA_METER_PUSH_CONSTANTS_DEFINED_
2020
#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_
2121

22-
23-
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
22+
uint CommonPushConstants_getPassLog2FFTSize(in int _pass)
2423
{
25-
return max(findMSB(pc.data.imageWidth-1u),_NBL_GLSL_WORKGROUP_SIZE_LOG2_)+1u;
24+
return bitfieldExtract(pc.data.flags,_pass*5+2,5);
2625
}
2726
uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel()
2827
{

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,10 @@ nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters()
223223
#include <nbl/builtin/glsl/math/complex.glsl>
224224
nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel)
225225
{
226-
ivec2 inputImageSize = textureSize(inputImage,0);
227-
vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*pc.kernelScale);
228-
vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/pc.kernelScale), -log2(pc.kernelScale));
226+
const ivec2 inputImageSize = textureSize(inputImage,0);
227+
const ivec2 halfInputImageSize = inputImageSize>>1;
228+
const vec2 relativeCoords = vec2(coordinate.xy-halfInputImageSize)/pc.kernelScale;
229+
const vec4 texelValue = textureLod(inputImage,(relativeCoords+vec2(0.5))/vec2(inputImageSize)+vec2(0.5),-log2(pc.kernelScale));
229230
return nbl_glsl_complex(texelValue[channel], 0.0f);
230231
}
231232
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
@@ -269,7 +270,8 @@ layout(set=0, binding=3, rg32f) uniform image2D NormalizedKernel[3];
269270
270271
layout(push_constant) uniform PushConstants
271272
{
272-
uvec4 strides;
273+
uvec3 strides;
274+
float bloomIntensity;
273275
uvec4 bitreverse_shift;
274276
} pc;
275277
@@ -288,6 +290,8 @@ void main()
288290
const uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>pc.bitreverse_shift.xy;
289291
const nbl_glsl_complex shift = nbl_glsl_expImaginary(-nbl_glsl_PI*float(coord.x+coord.y));
290292
value = nbl_glsl_complex_mul(value,shift)/power;
293+
const float bloomIntensity = 0.95;
294+
value = value*bloomIntensity+nbl_glsl_complex(1.0-bloomIntensity,0.0);
291295
imageStore(NormalizedKernel[gl_WorkGroupID.z],ivec2(coord),vec4(value,0.0,0.0));
292296
}
293297
)==="));
@@ -446,6 +450,10 @@ uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
446450
{
447451
return uvec3(pc.data.imageWidth,pc.data.imageHeight,1u);
448452
}
453+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
454+
{
455+
return CommonPushConstants_getPassLog2FFTSize(0);
456+
}
449457
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
450458
{
451459
return false;
@@ -459,7 +467,7 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection()
459467
460468
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
461469
{
462-
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
470+
const uint index = ((channel<<CommonPushConstants_getPassLog2FFTSize(0))+coordinate.x)*pc.data.imageHeight+coordinate.y;
463471
outSpectrum[index] = complex_value;
464472
}
465473
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
@@ -559,7 +567,11 @@ layout(binding=4) uniform sampler2D NormalizedKernel[3];
559567
560568
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
561569
{
562-
return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
570+
return uvec3(0x1u<<CommonPushConstants_getPassLog2FFTSize(0),pc.data.imageHeight,1u);
571+
}
572+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
573+
{
574+
return CommonPushConstants_getPassLog2FFTSize(1);
563575
}
564576
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
565577
{
@@ -576,7 +588,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
576588
#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_
577589
void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value)
578590
{
579-
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
591+
const uint index = ((channel<<CommonPushConstants_getPassLog2FFTSize(0))+coordinate.x)*pc.data.imageHeight+coordinate.y;
580592
spectrum[index] = complex_value;
581593
}
582594
#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_
@@ -642,7 +654,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
642654
{
643655
if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
644656
return nbl_glsl_complex(0.f,0.f);
645-
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
657+
const uint index = ((channel<<CommonPushConstants_getPassLog2FFTSize(0))+coordinate.x)*pc.data.imageHeight+coordinate.y;
646658
return spectrum[index];
647659
}
648660
)==="));
@@ -680,7 +692,11 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
680692
681693
uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions()
682694
{
683-
return uvec3(0x1u<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize(),pc.data.imageHeight,1u);
695+
return uvec3(0x1u<<CommonPushConstants_getPassLog2FFTSize(0),pc.data.imageHeight,1u);
696+
}
697+
uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize()
698+
{
699+
return CommonPushConstants_getPassLog2FFTSize(0);
684700
}
685701
bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse()
686702
{
@@ -772,7 +788,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
772788
{
773789
if (!nbl_glsl_ext_FFT_wrap_coord(coordinate))
774790
return nbl_glsl_complex(0.f,0.f);
775-
const uint index = ((channel<<nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize())+coordinate.x)*pc.data.imageHeight+coordinate.y;
791+
const uint index = ((channel<<CommonPushConstants_getPassLog2FFTSize(0))+coordinate.x)*pc.data.imageHeight+coordinate.y;
776792
return inSpectrum[index];
777793
}
778794
)==="));
@@ -1018,9 +1034,9 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
10181034
}
10191035

10201036
const auto& kerDim = outParam.kernel->getCreationParameters().extent;
1021-
const float bloomScale = core::min(float(extent.width)/float(kerDim.width),float(extent.height)/float(kerDim.height))*bloomScaleBundle[i].value();
1037+
const float bloomScale = core::min(float(extent.width) / float(kerDim.width), float(extent.height) / float(kerDim.height))* bloomScaleBundle[i].value();
10221038
if (bloomScale>1.f)
1023-
os::Printer::log(imageIDString + "Bloom Kernel will Clip and loose sharpness, increase resolution of bloom kernel!", ELL_WARNING);
1039+
os::Printer::log(imageIDString + "Bloom Kernel loose sharpness, increase resolution of bloom kernel!", ELL_WARNING);
10241040
const auto marginSrcDim = [extent,kerDim,bloomScale]() -> auto
10251041
{
10261042
auto tmp = extent;
@@ -1170,12 +1186,13 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
11701186
{
11711187
shaderConstants.imageWidth = param.width;
11721188
shaderConstants.imageHeight = param.height;
1173-
assert(intensityBufferOffset%IntensityValuesSize==0u);
11741189

1190+
assert(intensityBufferOffset%IntensityValuesSize==0u);
11751191
shaderConstants.intensityBufferDWORDOffset = intensityBufferOffset/IntensityValuesSize;
11761192
shaderConstants.denoiserExposureBias = denoiserExposureBiasBundle[i].value();
11771193

1178-
shaderConstants.flags = 0b11u; // (autoexposureOn<<1)|beforeDenoise
1194+
assert(param.fftPushConstants[0].getLog2FFTSize()==param.fftPushConstants[2].getLog2FFTSize());
1195+
shaderConstants.flags = (param.fftPushConstants[1].getLog2FFTSize()<<7u)|(param.fftPushConstants[0].getLog2FFTSize()<<2u)|0b11u; // (autoexposureOn<<1)|beforeDenoise
11791196
switch (tonemapperBundle[i].first)
11801197
{
11811198
case DTEA_TONEMAPPER_REINHARD:
@@ -1216,7 +1233,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
12161233
if (core::isnan(key))
12171234
{
12181235
shaderConstants.tonemapperParams[0] = 0.18;
1219-
shaderConstants.flags &= 0b01u; // ~(autoexposureOn<<1)
1236+
shaderConstants.flags &= ~0b10u; // ~(autoexposureOn<<1)
12201237
}
12211238
else
12221239
shaderConstants.tonemapperParams[0] = key;
@@ -1532,7 +1549,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe
15321549
// compute post-processing
15331550
{
15341551
// let the shaders know we're in the second phase now
1535-
shaderConstants.flags &= 0b10u;
1552+
shaderConstants.flags &= ~0b01u;
15361553
driver->pushConstants(sharedPipelineLayout.get(), video::IGPUSpecializedShader::ESS_COMPUTE, offsetof(CommonPushConstants,flags), sizeof(uint32_t), &shaderConstants.flags);
15371554
// Bloom
15381555
uint32_t workgroupCounts[2] = { (param.width+kComputeWGSize-1u)/kComputeWGSize,param.height };

src/nbl/video/CCUDAHandler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ class CCUDAHandler
342342
//
343343
static core::SRange<const io::IReadFile* const> getCUDASTDHeaders()
344344
{
345-
auto begin = reinterpret_cast<const io::IReadFile* const*>(&headers[0].get());
345+
auto begin = headers.empty() ? nullptr:reinterpret_cast<const io::IReadFile* const*>(&headers[0].get());
346346
return {begin,begin+headers.size()};
347347
}
348348
static const auto& getCUDASTDHeaderContents() { return headerContents; }

0 commit comments

Comments
 (0)