@@ -76,7 +76,7 @@ int main(int argc, char* argv[])
76
76
params.Vsync = true ;
77
77
params.Doublebuffer = true ;
78
78
params.Stencilbuffer = false ;
79
- params.StreamingDownloadBufferSize = 256 *1024 *1024 ; // change in Vulkan fo
79
+ params.StreamingDownloadBufferSize = 2560 *1024 *1024 ;
80
80
auto device = createDeviceEx (params);
81
81
82
82
if (check_error (!device," Could not create Irrlicht Device!" ))
@@ -843,38 +843,37 @@ void main()
843
843
OptixImage2D denoiserInputs[EII_COUNT];
844
844
OptixImage2D denoiserOutput;
845
845
846
- const video::VkMemoryRequirements& vulkanFetchedReqs = temporaryPixelBuffer.getObject ()->getMemoryReqs ().vulkanReqs ;
847
846
auto downloadStagingArea = driver->getDefaultDownStreamingBuffer ();
848
- uint32_t address = std::remove_pointer<decltype (downloadStagingArea)>::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation!
849
-
847
+
850
848
constexpr uint64_t timeoutInNanoSeconds = 300000000000u ;
851
849
const auto waitPoint = std::chrono::high_resolution_clock::now () + std::chrono::nanoseconds (timeoutInNanoSeconds);
852
850
853
- // download buffer
851
+ auto downloadAndGetBuffer = [&, downloadStagingArea]( auto & optixGpuBuffer) -> core::smart_refctd_ptr<ICPUBuffer>
854
852
{
853
+ const video::VkMemoryRequirements& vulkanFetchedReqs = optixGpuBuffer.getObject ()->getMemoryReqs ().vulkanReqs ;
854
+ uint32_t address = std::remove_pointer<decltype (downloadStagingArea)>::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation!
855
855
const uint32_t alignment = 4096u ; // common page size
856
856
const uint32_t size = vulkanFetchedReqs.size ;
857
857
auto unallocatedSize = downloadStagingArea->multi_alloc (waitPoint, 1u , &address, &size, &alignment);
858
858
if (unallocatedSize)
859
859
{
860
860
os::Printer::log (makeImageIDString (i) + " Could not download the buffer from the GPU!" , ELL_ERROR);
861
- continue ;
861
+ return nullptr ;
862
862
}
863
863
864
- driver->copyBuffer (temporaryPixelBuffer.getObject (), downloadStagingArea->getBuffer (), 0u , address, vulkanFetchedReqs.size );
865
- }
866
- auto downloadFence = driver->placeFence (true );
864
+ driver->copyBuffer (optixGpuBuffer.getObject (), downloadStagingArea->getBuffer (), 0u , address, vulkanFetchedReqs.size );
867
865
868
- auto * data = reinterpret_cast <uint8_t *>(downloadStagingArea->getBufferPointer ()) + address;
869
- auto denoiserInputsTexelBuffer = core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t >>>(vulkanFetchedReqs.size , data, core::adopt_memory);
866
+ auto downloadFence = driver->placeFence (true );
870
867
871
- while (downloadFence->waitCPU (1000ull , downloadFence->canDeferredFlush ()) == video::EDFR_TIMEOUT_EXPIRED) {}
872
-
873
- // downloadStagingArea->multi_free()
874
- // inline void multi_free(uint32_t count, const size_type* addr, const size_type* bytes) noexcept
875
- // @devsh count?
876
- // it fails at second image while downloading buffer, guess it's due to lack of multi-free
868
+ auto * data = reinterpret_cast <uint8_t *>(downloadStagingArea->getBufferPointer ()) + address;
869
+ return core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t >>>(vulkanFetchedReqs.size , data, core::adopt_memory);
870
+
871
+ while (downloadFence->waitCPU (1000ull , downloadFence->canDeferredFlush ()) == video::EDFR_TIMEOUT_EXPIRED) {}
872
+ };
877
873
874
+ core::smart_refctd_ptr<ICPUBuffer> denoiserInputsTexelBuffer = downloadAndGetBuffer (temporaryPixelBuffer);
875
+ core::smart_refctd_ptr<ICPUBuffer> denoiserOutputTexelBuffer = downloadAndGetBuffer (imagePixelBuffer);
876
+
878
877
for (size_t k = 0 ; k < denoiserInputCount; k++)
879
878
{
880
879
denoiserInputs[k].data = temporaryPixelBuffer.asBuffer .pointer + shaderConstants.outImageOffset [k] * sizeof (uint16_t );
@@ -886,7 +885,7 @@ void main()
886
885
887
886
#ifdef DDSInputOutputDenoiserSave
888
887
889
- auto createImage = [&](bool isInputFilterImage = true ) -> core::smart_refctd_ptr<ICPUImage>
888
+ auto createImage = [&](core::smart_refctd_ptr<ICPUBuffer> referecceTexelBuffer, uint64_t memoryOffset, bool isInputFilterImage = true ) -> core::smart_refctd_ptr<ICPUImage>
890
889
{
891
890
asset::ICPUImage::SCreationParams imgInfo;
892
891
imgInfo.format = isInputFilterImage ? EF_R16G16B16_SFLOAT : EF_R16G16B16A16_SFLOAT;
@@ -905,7 +904,7 @@ void main()
905
904
auto createTexelBuffer = [&]() -> core::smart_refctd_ptr<asset::ICPUBuffer>
906
905
{
907
906
if (isInputFilterImage)
908
- return core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t >>>(image->getImageDataSizeInBytes (), reinterpret_cast <uint16_t *>(denoiserInputsTexelBuffer ->getPointer ()) + shaderConstants. outImageOffset [k] , core::adopt_memory);
907
+ return core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t >>>(image->getImageDataSizeInBytes (), reinterpret_cast <uint8_t *>(referecceTexelBuffer ->getPointer ()) + memoryOffset , core::adopt_memory);
909
908
else
910
909
{
911
910
auto texelBuffer = core::make_smart_refctd_ptr<asset::ICPUBuffer>(image->getImageDataSizeInBytes ());
@@ -932,36 +931,42 @@ void main()
932
931
return image;
933
932
};
934
933
935
- core::smart_refctd_ptr<asset::ICPUImage> inputTileImage = createImage ();
936
- core::smart_refctd_ptr<asset::ICPUImage> outputTileImage = createImage (false );
937
-
938
- using ConvertFilter = asset::CConvertFormatImageFilter<EF_R16G16B16_SFLOAT, EF_R16G16B16A16_SFLOAT>;
939
- ConvertFilter convertFilter;
940
- ConvertFilter::state_type state;
941
-
942
- state.extent = { param.width , param.height , 1 };
943
- state.inBaseLayer = 0 ;
944
- state.inImage = inputTileImage.get ();
945
- state.inMipLevel = 0 ;
946
- state.inOffset = { 0 , 0 , 0 };
947
- state.layerCount = 1 ;
948
- state.outBaseLayer = 0 ;
949
- state.outImage = outputTileImage.get ();
950
- state.outMipLevel = 0 ;
951
- state.outOffset = { 0 , 0 , 0 };
952
-
953
- if (!convertFilter.execute (&state))
954
- os::Printer::log (" WARNING (" + std::to_string (__LINE__) + " line): Something went wrong while converting the image!" , ELL_WARNING);
934
+ auto getConvertedOptixData = [&](core::smart_refctd_ptr<ICPUBuffer> texelBuffer, uint64_t memoryOffset) -> core::smart_refctd_ptr<ICPUImageView>
935
+ {
936
+ core::smart_refctd_ptr<asset::ICPUImage> inputFilterImage = createImage (texelBuffer, memoryOffset);
937
+ core::smart_refctd_ptr<asset::ICPUImage> outputFilterImage = createImage (texelBuffer, memoryOffset, false );
938
+
939
+ using ConvertFilter = asset::CConvertFormatImageFilter<EF_R16G16B16_SFLOAT, EF_R16G16B16A16_SFLOAT>;
940
+ ConvertFilter convertFilter;
941
+ ConvertFilter::state_type state;
942
+
943
+ state.extent = { param.width , param.height , 1 };
944
+ state.inBaseLayer = 0 ;
945
+ state.inImage = inputFilterImage.get ();
946
+ state.inMipLevel = 0 ;
947
+ state.inOffset = { 0 , 0 , 0 };
948
+ state.layerCount = 1 ;
949
+ state.outBaseLayer = 0 ;
950
+ state.outImage = outputFilterImage.get ();
951
+ state.outMipLevel = 0 ;
952
+ state.outOffset = { 0 , 0 , 0 };
953
+
954
+ if (!convertFilter.execute (&state))
955
+ os::Printer::log (" WARNING (" + std::to_string (__LINE__) + " line): Something went wrong while converting the image!" , ELL_WARNING);
956
+
957
+ ICPUImageView::SCreationParams imageViewParams;
958
+ imageViewParams.flags = static_cast <ICPUImageView::E_CREATE_FLAGS>(0u );
959
+ imageViewParams.format = outputFilterImage->getCreationParameters ().format ;
960
+ imageViewParams.image = std::move (outputFilterImage);
961
+ imageViewParams.viewType = ICPUImageView::ET_2D;
962
+ imageViewParams.subresourceRange = { static_cast <IImage::E_ASPECT_FLAGS>(0u ),0u ,1u ,0u ,1u };
963
+
964
+ return ICPUImageView::create (std::move (imageViewParams));
965
+ };
955
966
956
- ICPUImageView::SCreationParams imageViewParams;
957
- imageViewParams.flags = static_cast <ICPUImageView::E_CREATE_FLAGS>(0u );
958
- imageViewParams.format = outputTileImage->getCreationParameters ().format ;
959
- imageViewParams.image = std::move (outputTileImage);
960
- imageViewParams.viewType = ICPUImageView::ET_2D;
961
- imageViewParams.subresourceRange = { static_cast <IImage::E_ASPECT_FLAGS>(0u ),0u ,1u ,0u ,1u };
962
- auto imageView = ICPUImageView::create (std::move (imageViewParams));
967
+ core::smart_refctd_ptr<ICPUImageView> inputDenoiserImageData = getConvertedOptixData (denoiserInputsTexelBuffer, shaderConstants.outImageOffset [k] * sizeof (uint16_t ));
963
968
{
964
- IAssetWriter::SAssetWriteParams wp (imageView .get ());
969
+ IAssetWriter::SAssetWriteParams wp (inputDenoiserImageData .get ());
965
970
auto getInputTerminateName = [&]() -> std::string
966
971
{
967
972
switch (k)
@@ -981,7 +986,17 @@ void main()
981
986
std::string fileName = removedExtensionFile + " _optix_input_" + getInputTerminateName () + " .dds" ;
982
987
983
988
if (!am->writeAsset (fileName , wp))
984
- os::Printer::log (" ERROR (" + std::to_string (__LINE__) + " line): Could not save .dds file!" , ELL_ERROR);
989
+ os::Printer::log (" ERROR (" + std::to_string (__LINE__) + " line): Could not save input denoiser .dds file!" , ELL_ERROR);
990
+
991
+ if (k >= denoiserInputCount - 1 )
992
+ {
993
+ core::smart_refctd_ptr<ICPUImageView> outputDenoiserImageData = getConvertedOptixData (denoiserOutputTexelBuffer, shaderConstants.inImageTexelOffset [EII_COLOR]);
994
+ fileName = removedExtensionFile + " _optix_output.dds" ;
995
+ IAssetWriter::SAssetWriteParams wp (outputDenoiserImageData.get ());
996
+
997
+ if (!am->writeAsset (fileName, wp))
998
+ os::Printer::log (" ERROR (" + std::to_string (__LINE__) + " line): Could not save output denoiser .dds file!" , ELL_ERROR);
999
+ }
985
1000
}
986
1001
987
1002
#endif // DDSInputOutputDenoiserSave
0 commit comments