Skip to content

Commit df3ff18

Browse files
committed
make it work, output dds input/output denoiser data
1 parent 0dc20c2 commit df3ff18

File tree

2 files changed

+63
-55
lines changed

2 files changed

+63
-55
lines changed

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 63 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ int main(int argc, char* argv[])
7676
params.Vsync = true;
7777
params.Doublebuffer = true;
7878
params.Stencilbuffer = false;
79-
params.StreamingDownloadBufferSize = 256*1024*1024; // change in Vulkan fo
79+
params.StreamingDownloadBufferSize = 2560*1024*1024;
8080
auto device = createDeviceEx(params);
8181

8282
if (check_error(!device,"Could not create Irrlicht Device!"))
@@ -843,38 +843,37 @@ void main()
843843
OptixImage2D denoiserInputs[EII_COUNT];
844844
OptixImage2D denoiserOutput;
845845

846-
const video::VkMemoryRequirements& vulkanFetchedReqs = temporaryPixelBuffer.getObject()->getMemoryReqs().vulkanReqs;
847846
auto downloadStagingArea = driver->getDefaultDownStreamingBuffer();
848-
uint32_t address = std::remove_pointer<decltype(downloadStagingArea)>::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation!
849-
847+
850848
constexpr uint64_t timeoutInNanoSeconds = 300000000000u;
851849
const auto waitPoint = std::chrono::high_resolution_clock::now() + std::chrono::nanoseconds(timeoutInNanoSeconds);
852850

853-
// download buffer
851+
auto downloadAndGetBuffer = [&, downloadStagingArea](auto& optixGpuBuffer) -> core::smart_refctd_ptr<ICPUBuffer>
854852
{
853+
const video::VkMemoryRequirements& vulkanFetchedReqs = optixGpuBuffer.getObject()->getMemoryReqs().vulkanReqs;
854+
uint32_t address = std::remove_pointer<decltype(downloadStagingArea)>::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation!
855855
const uint32_t alignment = 4096u; // common page size
856856
const uint32_t size = vulkanFetchedReqs.size;
857857
auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint, 1u, &address, &size, &alignment);
858858
if (unallocatedSize)
859859
{
860860
os::Printer::log(makeImageIDString(i) + "Could not download the buffer from the GPU!", ELL_ERROR);
861-
continue;
861+
return nullptr;
862862
}
863863

864-
driver->copyBuffer(temporaryPixelBuffer.getObject(), downloadStagingArea->getBuffer(), 0u, address, vulkanFetchedReqs.size);
865-
}
866-
auto downloadFence = driver->placeFence(true);
864+
driver->copyBuffer(optixGpuBuffer.getObject(), downloadStagingArea->getBuffer(), 0u, address, vulkanFetchedReqs.size);
867865

868-
auto* data = reinterpret_cast<uint8_t*>(downloadStagingArea->getBufferPointer()) + address;
869-
auto denoiserInputsTexelBuffer = core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t>>>(vulkanFetchedReqs.size, data, core::adopt_memory);
866+
auto downloadFence = driver->placeFence(true);
870867

871-
while (downloadFence->waitCPU(1000ull, downloadFence->canDeferredFlush()) == video::EDFR_TIMEOUT_EXPIRED) {}
872-
873-
//downloadStagingArea->multi_free()
874-
//inline void multi_free(uint32_t count, const size_type* addr, const size_type* bytes) noexcept
875-
// @devsh count?
876-
// it fails at second image while downloading buffer, guess it's due to lack of multi-free
868+
auto* data = reinterpret_cast<uint8_t*>(downloadStagingArea->getBufferPointer()) + address;
869+
return core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t>>>(vulkanFetchedReqs.size, data, core::adopt_memory);
870+
871+
while (downloadFence->waitCPU(1000ull, downloadFence->canDeferredFlush()) == video::EDFR_TIMEOUT_EXPIRED) {}
872+
};
877873

874+
core::smart_refctd_ptr<ICPUBuffer> denoiserInputsTexelBuffer = downloadAndGetBuffer(temporaryPixelBuffer);
875+
core::smart_refctd_ptr<ICPUBuffer> denoiserOutputTexelBuffer = downloadAndGetBuffer(imagePixelBuffer);
876+
878877
for (size_t k = 0; k < denoiserInputCount; k++)
879878
{
880879
denoiserInputs[k].data = temporaryPixelBuffer.asBuffer.pointer + shaderConstants.outImageOffset[k] * sizeof(uint16_t);
@@ -886,7 +885,7 @@ void main()
886885

887886
#ifdef DDSInputOutputDenoiserSave
888887

889-
auto createImage = [&](bool isInputFilterImage = true) -> core::smart_refctd_ptr<ICPUImage>
888+
auto createImage = [&](core::smart_refctd_ptr<ICPUBuffer> referecceTexelBuffer, uint64_t memoryOffset, bool isInputFilterImage = true) -> core::smart_refctd_ptr<ICPUImage>
890889
{
891890
asset::ICPUImage::SCreationParams imgInfo;
892891
imgInfo.format = isInputFilterImage ? EF_R16G16B16_SFLOAT : EF_R16G16B16A16_SFLOAT;
@@ -905,7 +904,7 @@ void main()
905904
auto createTexelBuffer = [&]() -> core::smart_refctd_ptr<asset::ICPUBuffer>
906905
{
907906
if (isInputFilterImage)
908-
return core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t>>>(image->getImageDataSizeInBytes(), reinterpret_cast<uint16_t*>(denoiserInputsTexelBuffer->getPointer()) + shaderConstants.outImageOffset[k], core::adopt_memory);
907+
return core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t>>>(image->getImageDataSizeInBytes(), reinterpret_cast<uint8_t*>(referecceTexelBuffer->getPointer()) + memoryOffset, core::adopt_memory);
909908
else
910909
{
911910
auto texelBuffer = core::make_smart_refctd_ptr<asset::ICPUBuffer>(image->getImageDataSizeInBytes());
@@ -932,36 +931,42 @@ void main()
932931
return image;
933932
};
934933

935-
core::smart_refctd_ptr<asset::ICPUImage> inputTileImage = createImage();
936-
core::smart_refctd_ptr<asset::ICPUImage> outputTileImage = createImage(false);
937-
938-
using ConvertFilter = asset::CConvertFormatImageFilter<EF_R16G16B16_SFLOAT, EF_R16G16B16A16_SFLOAT>;
939-
ConvertFilter convertFilter;
940-
ConvertFilter::state_type state;
941-
942-
state.extent = { param.width, param.height, 1 };
943-
state.inBaseLayer = 0;
944-
state.inImage = inputTileImage.get();
945-
state.inMipLevel = 0;
946-
state.inOffset = { 0, 0, 0 };
947-
state.layerCount = 1;
948-
state.outBaseLayer = 0;
949-
state.outImage = outputTileImage.get();
950-
state.outMipLevel = 0;
951-
state.outOffset = { 0, 0, 0 };
952-
953-
if (!convertFilter.execute(&state))
954-
os::Printer::log("WARNING (" + std::to_string(__LINE__) + " line): Something went wrong while converting the image!", ELL_WARNING);
934+
auto getConvertedOptixData = [&](core::smart_refctd_ptr<ICPUBuffer> texelBuffer, uint64_t memoryOffset) -> core::smart_refctd_ptr<ICPUImageView>
935+
{
936+
core::smart_refctd_ptr<asset::ICPUImage> inputFilterImage = createImage(texelBuffer, memoryOffset);
937+
core::smart_refctd_ptr<asset::ICPUImage> outputFilterImage = createImage(texelBuffer, memoryOffset, false);
938+
939+
using ConvertFilter = asset::CConvertFormatImageFilter<EF_R16G16B16_SFLOAT, EF_R16G16B16A16_SFLOAT>;
940+
ConvertFilter convertFilter;
941+
ConvertFilter::state_type state;
942+
943+
state.extent = { param.width, param.height, 1 };
944+
state.inBaseLayer = 0;
945+
state.inImage = inputFilterImage.get();
946+
state.inMipLevel = 0;
947+
state.inOffset = { 0, 0, 0 };
948+
state.layerCount = 1;
949+
state.outBaseLayer = 0;
950+
state.outImage = outputFilterImage.get();
951+
state.outMipLevel = 0;
952+
state.outOffset = { 0, 0, 0 };
953+
954+
if (!convertFilter.execute(&state))
955+
os::Printer::log("WARNING (" + std::to_string(__LINE__) + " line): Something went wrong while converting the image!", ELL_WARNING);
956+
957+
ICPUImageView::SCreationParams imageViewParams;
958+
imageViewParams.flags = static_cast<ICPUImageView::E_CREATE_FLAGS>(0u);
959+
imageViewParams.format = outputFilterImage->getCreationParameters().format;
960+
imageViewParams.image = std::move(outputFilterImage);
961+
imageViewParams.viewType = ICPUImageView::ET_2D;
962+
imageViewParams.subresourceRange = { static_cast<IImage::E_ASPECT_FLAGS>(0u),0u,1u,0u,1u };
963+
964+
return ICPUImageView::create(std::move(imageViewParams));
965+
};
955966

956-
ICPUImageView::SCreationParams imageViewParams;
957-
imageViewParams.flags = static_cast<ICPUImageView::E_CREATE_FLAGS>(0u);
958-
imageViewParams.format = outputTileImage->getCreationParameters().format;
959-
imageViewParams.image = std::move(outputTileImage);
960-
imageViewParams.viewType = ICPUImageView::ET_2D;
961-
imageViewParams.subresourceRange = { static_cast<IImage::E_ASPECT_FLAGS>(0u),0u,1u,0u,1u };
962-
auto imageView = ICPUImageView::create(std::move(imageViewParams));
967+
core::smart_refctd_ptr<ICPUImageView> inputDenoiserImageData = getConvertedOptixData(denoiserInputsTexelBuffer, shaderConstants.outImageOffset[k] * sizeof(uint16_t));
963968
{
964-
IAssetWriter::SAssetWriteParams wp(imageView.get());
969+
IAssetWriter::SAssetWriteParams wp(inputDenoiserImageData.get());
965970
auto getInputTerminateName = [&]() -> std::string
966971
{
967972
switch (k)
@@ -981,7 +986,17 @@ void main()
981986
std::string fileName = removedExtensionFile + "_optix_input_" + getInputTerminateName() + ".dds";
982987

983988
if(!am->writeAsset(fileName , wp))
984-
os::Printer::log("ERROR (" + std::to_string(__LINE__) + " line): Could not save .dds file!", ELL_ERROR);
989+
os::Printer::log("ERROR (" + std::to_string(__LINE__) + " line): Could not save input denoiser .dds file!", ELL_ERROR);
990+
991+
if (k >= denoiserInputCount - 1)
992+
{
993+
core::smart_refctd_ptr<ICPUImageView> outputDenoiserImageData = getConvertedOptixData(denoiserOutputTexelBuffer, shaderConstants.inImageTexelOffset[EII_COLOR]);
994+
fileName = removedExtensionFile + "_optix_output.dds";
995+
IAssetWriter::SAssetWriteParams wp(outputDenoiserImageData.get());
996+
997+
if (!am->writeAsset(fileName, wp))
998+
os::Printer::log("ERROR (" + std::to_string(__LINE__) + " line): Could not save output denoiser .dds file!", ELL_ERROR);
999+
}
9851000
}
9861001

9871002
#endif // DDSInputOutputDenoiserSave

include/nbl/builtin/glsl/ext/LumaMeter/common.glsl

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,6 @@ struct nbl_glsl_ext_LumaMeter_Uniforms_t
5252
#define _NBL_GLSL_EXT_LUMA_METER_SHARED_SIZE_NEEDED_ _NBL_GLSL_WORKGROUP_ARITHMETIC_SHARED_SIZE_NEEDED_
5353
#endif
5454

55-
#if _NBL_GLSL_EXT_LUMA_METER_MAX_LUMA_DEFINED_<0xffffffff
56-
#error "1"
57-
#endif
58-
#if _NBL_GLSL_EXT_LUMA_METER_MIN_LUMA_DEFINED_<0xffffffff
59-
#error "1"
60-
#endif
61-
6255
#if NBL_GLSL_NOT_EQUAL(NBL_GLSL_AND(NBL_GLSL_SUB(_NBL_GLSL_EXT_LUMA_METER_MAX_LUMA_DEFINED_,_NBL_GLSL_EXT_LUMA_METER_MIN_LUMA_DEFINED_),_NBL_GLSL_EXT_LUMA_METER_BIN_COUNT-1),0)
6356
#error "The number of bins must evenly divide the histogram range!"
6457
#endif

0 commit comments

Comments
 (0)