Skip to content

Commit 195d3d3

Browse files
use Nabla for allocating scratch again
1 parent 5007a81 commit 195d3d3

File tree

1 file changed

+20
-21
lines changed
  • examples_tests/39.DenoiserTonemapper

1 file changed

+20
-21
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,21 @@ enum E_IMAGE_INPUT : uint32_t
2929
};
3030
constexpr uint32_t calcDenoiserBuffersNeeded(E_IMAGE_INPUT denoiserType)
3131
{
32-
return 3u+denoiserType;
32+
return 4u+denoiserType;
3333
}
3434

35+
using FFTClass = ext::FFT::FFT;
36+
3537
struct ImageToDenoise
3638
{
39+
FFTClass::Parameters_t fftPushConstants[3];
40+
FFTClass::DispatchInfo_t fftDispatchInfo[3];
41+
core::smart_refctd_ptr<asset::ICPUImage> image[EII_COUNT] = { nullptr,nullptr,nullptr };
42+
core::smart_refctd_ptr<asset::ICPUImage> kernel = nullptr;
3743
uint32_t width = 0u, height = 0u;
3844
uint32_t colorTexelSize = 0u;
3945
E_IMAGE_INPUT denoiserType = EII_COUNT;
4046
float bloomScale;
41-
core::smart_refctd_ptr<asset::ICPUImage> image[EII_COUNT] = { nullptr,nullptr,nullptr };
42-
core::smart_refctd_ptr<asset::ICPUImage> kernel = nullptr;
4347
};
4448
struct DenoiserToUse
4549
{
@@ -160,7 +164,6 @@ int main(int argc, char* argv[])
160164
}
161165

162166

163-
using FFTClass = ext::FFT::FFT;
164167
using LumaMeterClass = ext::LumaMeter::CLumaMeter;
165168
using ToneMapperClass = ext::ToneMapper::CToneMapper;
166169

@@ -428,7 +431,7 @@ void main()
428431
uint32_t fftScratchSize = 0u;
429432
{
430433
asset::IAssetLoader::SAssetLoadParams lp(0ull,nullptr);
431-
auto default_kernel_image_bundle = am->getAsset("../../media/kernels/physical_flare_512.exr",lp); // TODO: use builtins?
434+
auto default_kernel_image_bundle = am->getAsset("../../media/kernels/physical_flare_512.exr",lp); // TODO: make it a builtins?
432435

433436
for (size_t i=0; i < inputFilesAmount; i++)
434437
{
@@ -580,9 +583,8 @@ void main()
580583
}();
581584
fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize);
582585
{
583-
// TODO: store these
584-
FFTClass::Parameters_t fftPushConstants[3];
585-
FFTClass::DispatchInfo_t fftDispatchInfo[3];
586+
auto* fftPushConstants = outParam.fftPushConstants;
587+
auto* fftDispatchInfo = outParam.fftDispatchInfo;
586588
const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
587589
const auto passes = FFTClass::buildParameters(false,colorChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim);
588590
{
@@ -648,20 +650,19 @@ void main()
648650
// keep all CUDA links in an array (less code to map/unmap)
649651
constexpr uint32_t kMaxDenoiserBuffers = calcDenoiserBuffersNeeded(EII_NORMAL);
650652
cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer> bufferLinks[kMaxDenoiserBuffers];
651-
// except for the scratch CUDA buffer which can and will be ENORMOUS
652-
CUdeviceptr denoiserScratch = 0ull; // TODO: allocate scratch with Nabla
653653
// set-up denoisers
654654
constexpr size_t IntensityValuesSize = sizeof(float);
655655
auto& intensityBuffer = bufferLinks[0];
656656
auto& denoiserState = bufferLinks[0];
657-
auto& temporaryPixelBuffer = bufferLinks[1];
658-
auto& colorPixelBuffer = bufferLinks[2];
659-
auto& albedoPixelBuffer = bufferLinks[3];
660-
auto& normalPixelBuffer = bufferLinks[4];
657+
auto& scratch = bufferLinks[1];
658+
auto& temporaryPixelBuffer = bufferLinks[2];
659+
auto& colorPixelBuffer = bufferLinks[3];
660+
auto& albedoPixelBuffer = bufferLinks[4];
661+
auto& normalPixelBuffer = bufferLinks[5];
661662
//auto denoised;
662663
size_t denoiserStateBufferSize = 0ull;
663664
{
664-
size_t scratchBufferSize = 0ull;
665+
size_t scratchBufferSize = fftScratchSize;
665666
size_t tempBufferSize = fftScratchSize;
666667
for (uint32_t i=0u; i<EII_COUNT; i++)
667668
{
@@ -697,7 +698,8 @@ void main()
697698
if (check_error(!cuda::CCUDAHandler::defaultHandleResult(cuda::CCUDAHandler::registerBuffer(&temporaryPixelBuffer)),"Could not register buffer for Denoiser scratch memory!"))
698699
return error_code;
699700
// TODO: allocate scratch with Nabla again
700-
if (check_error(!cuda::CCUDAHandler::defaultHandleResult(cuda::CCUDAHandler::cuda.pcuMemAlloc_v2(&denoiserScratch,scratchBufferSize)), "Could not register buffer for Denoiser temporary memory with CUDA natively!"))
701+
scratch = driver->createDeviceLocalGPUBufferOnDedMem(scratchBufferSize);
702+
if (check_error(!cuda::CCUDAHandler::defaultHandleResult(cuda::CCUDAHandler::registerBuffer(&scratch)), "Could not register buffer for Denoiser temporary memory with CUDA natively!"))
701703
return error_code;
702704
}
703705
const auto intensityBufferOffset = denoiserStateBufferSize;
@@ -898,12 +900,9 @@ void main()
898900
};
899901
core::SRAIIBasedExiter<decltype(unmapBuffers)> exitRoutine(unmapBuffers);
900902

901-
cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer> fakeScratchLink; // TODO: undo this
902-
fakeScratchLink.asBuffer.pointer = denoiserScratch;
903-
904903
// set up denoiser
905904
auto& denoiser = denoisers[param.denoiserType];
906-
if (denoiser.m_denoiser->setup(m_cudaStream, denoiseTileDimsWithOverlap, denoiserState, denoiser.stateSize, fakeScratchLink, denoiser.scratchSize, denoiser.stateOffset) != OPTIX_SUCCESS)
905+
if (denoiser.m_denoiser->setup(m_cudaStream, denoiseTileDimsWithOverlap, denoiserState, denoiser.stateSize, scratch, denoiser.scratchSize, denoiser.stateOffset) != OPTIX_SUCCESS)
907906
{
908907
os::Printer::log(makeImageIDString(i) + "Could not setup the denoiser for the image resolution and denoiser buffers, skipping image!", ELL_ERROR);
909908
continue;
@@ -944,7 +943,7 @@ void main()
944943
denoiserInputs,
945944
denoiserInputCount,
946945
&denoiserOutput,
947-
fakeScratchLink,
946+
scratch,
948947
denoiser.scratchSize,
949948
overlap,
950949
tileWidth,

0 commit comments

Comments
 (0)