@@ -29,17 +29,21 @@ enum E_IMAGE_INPUT : uint32_t
29
29
};
30
30
constexpr uint32_t calcDenoiserBuffersNeeded (E_IMAGE_INPUT denoiserType)
31
31
{
32
- return 3u +denoiserType;
32
+ return 4u +denoiserType;
33
33
}
34
34
35
+ using FFTClass = ext::FFT::FFT;
36
+
35
37
struct ImageToDenoise
36
38
{
39
+ FFTClass::Parameters_t fftPushConstants[3 ];
40
+ FFTClass::DispatchInfo_t fftDispatchInfo[3 ];
41
+ core::smart_refctd_ptr<asset::ICPUImage> image[EII_COUNT] = { nullptr ,nullptr ,nullptr };
42
+ core::smart_refctd_ptr<asset::ICPUImage> kernel = nullptr ;
37
43
uint32_t width = 0u , height = 0u ;
38
44
uint32_t colorTexelSize = 0u ;
39
45
E_IMAGE_INPUT denoiserType = EII_COUNT;
40
46
float bloomScale;
41
- core::smart_refctd_ptr<asset::ICPUImage> image[EII_COUNT] = { nullptr ,nullptr ,nullptr };
42
- core::smart_refctd_ptr<asset::ICPUImage> kernel = nullptr ;
43
47
};
44
48
struct DenoiserToUse
45
49
{
@@ -160,7 +164,6 @@ int main(int argc, char* argv[])
160
164
}
161
165
162
166
163
- using FFTClass = ext::FFT::FFT;
164
167
using LumaMeterClass = ext::LumaMeter::CLumaMeter;
165
168
using ToneMapperClass = ext::ToneMapper::CToneMapper;
166
169
@@ -428,7 +431,7 @@ void main()
428
431
uint32_t fftScratchSize = 0u ;
429
432
{
430
433
asset::IAssetLoader::SAssetLoadParams lp (0ull ,nullptr );
431
- auto default_kernel_image_bundle = am->getAsset (" ../../media/kernels/physical_flare_512.exr" ,lp); // TODO: use builtins?
434
+ auto default_kernel_image_bundle = am->getAsset (" ../../media/kernels/physical_flare_512.exr" ,lp); // TODO: make it a builtins?
432
435
433
436
for (size_t i=0 ; i < inputFilesAmount; i++)
434
437
{
@@ -580,9 +583,8 @@ void main()
580
583
}();
581
584
fftScratchSize = core::max (FFTClass::getOutputBufferSize (usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize);
582
585
{
583
- // TODO: store these
584
- FFTClass::Parameters_t fftPushConstants[3 ];
585
- FFTClass::DispatchInfo_t fftDispatchInfo[3 ];
586
+ auto * fftPushConstants = outParam.fftPushConstants ;
587
+ auto * fftDispatchInfo = outParam.fftDispatchInfo ;
586
588
const ISampler::E_TEXTURE_CLAMP fftPadding[2 ] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR};
587
589
const auto passes = FFTClass::buildParameters (false ,colorChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim);
588
590
{
@@ -648,20 +650,19 @@ void main()
648
650
// keep all CUDA links in an array (less code to map/unmap)
649
651
constexpr uint32_t kMaxDenoiserBuffers = calcDenoiserBuffersNeeded (EII_NORMAL);
650
652
cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer> bufferLinks[kMaxDenoiserBuffers ];
651
- // except for the scratch CUDA buffer which can and will be ENORMOUS
652
- CUdeviceptr denoiserScratch = 0ull ; // TODO: allocate scratch with Nabla
653
653
// set-up denoisers
654
654
constexpr size_t IntensityValuesSize = sizeof (float );
655
655
auto & intensityBuffer = bufferLinks[0 ];
656
656
auto & denoiserState = bufferLinks[0 ];
657
- auto & temporaryPixelBuffer = bufferLinks[1 ];
658
- auto & colorPixelBuffer = bufferLinks[2 ];
659
- auto & albedoPixelBuffer = bufferLinks[3 ];
660
- auto & normalPixelBuffer = bufferLinks[4 ];
657
+ auto & scratch = bufferLinks[1 ];
658
+ auto & temporaryPixelBuffer = bufferLinks[2 ];
659
+ auto & colorPixelBuffer = bufferLinks[3 ];
660
+ auto & albedoPixelBuffer = bufferLinks[4 ];
661
+ auto & normalPixelBuffer = bufferLinks[5 ];
661
662
// auto denoised;
662
663
size_t denoiserStateBufferSize = 0ull ;
663
664
{
664
- size_t scratchBufferSize = 0ull ;
665
+ size_t scratchBufferSize = fftScratchSize ;
665
666
size_t tempBufferSize = fftScratchSize;
666
667
for (uint32_t i=0u ; i<EII_COUNT; i++)
667
668
{
@@ -697,7 +698,8 @@ void main()
697
698
if (check_error (!cuda::CCUDAHandler::defaultHandleResult (cuda::CCUDAHandler::registerBuffer (&temporaryPixelBuffer))," Could not register buffer for Denoiser scratch memory!" ))
698
699
return error_code;
699
700
// TODO: allocate scratch with Nabla again
700
- if (check_error (!cuda::CCUDAHandler::defaultHandleResult (cuda::CCUDAHandler::cuda.pcuMemAlloc_v2 (&denoiserScratch,scratchBufferSize)), " Could not register buffer for Denoiser temporary memory with CUDA natively!" ))
701
+ scratch = driver->createDeviceLocalGPUBufferOnDedMem (scratchBufferSize);
702
+ if (check_error (!cuda::CCUDAHandler::defaultHandleResult (cuda::CCUDAHandler::registerBuffer (&scratch)), " Could not register buffer for Denoiser temporary memory with CUDA natively!" ))
701
703
return error_code;
702
704
}
703
705
const auto intensityBufferOffset = denoiserStateBufferSize;
@@ -898,12 +900,9 @@ void main()
898
900
};
899
901
core::SRAIIBasedExiter<decltype (unmapBuffers)> exitRoutine (unmapBuffers);
900
902
901
- cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer> fakeScratchLink; // TODO: undo this
902
- fakeScratchLink.asBuffer .pointer = denoiserScratch;
903
-
904
903
// set up denoiser
905
904
auto & denoiser = denoisers[param.denoiserType ];
906
- if (denoiser.m_denoiser ->setup (m_cudaStream, denoiseTileDimsWithOverlap, denoiserState, denoiser.stateSize , fakeScratchLink , denoiser.scratchSize , denoiser.stateOffset ) != OPTIX_SUCCESS)
905
+ if (denoiser.m_denoiser ->setup (m_cudaStream, denoiseTileDimsWithOverlap, denoiserState, denoiser.stateSize , scratch , denoiser.scratchSize , denoiser.stateOffset ) != OPTIX_SUCCESS)
907
906
{
908
907
os::Printer::log (makeImageIDString (i) + " Could not setup the denoiser for the image resolution and denoiser buffers, skipping image!" , ELL_ERROR);
909
908
continue ;
@@ -944,7 +943,7 @@ void main()
944
943
denoiserInputs,
945
944
denoiserInputCount,
946
945
&denoiserOutput,
947
- fakeScratchLink ,
946
+ scratch ,
948
947
denoiser.scratchSize ,
949
948
overlap,
950
949
tileWidth,
0 commit comments