Skip to content

Commit c79f13c

Browse files
committed
Move Hazard's work to Nabla and adjust to some NBL changes, make it compile
1 parent 558f1fb commit c79f13c

File tree

8 files changed

+139
-95
lines changed

8 files changed

+139
-95
lines changed

cmake/FindOptiX.cmake

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ macro(_find_version_path targetVersion targetPath rootName searchList )
2929
string ( REGEX MATCH "${rootName}(.*)([0-9]+).([0-9]+).([0-9]+)(.*)$" result "${checkdir}" )
3030
if ( "${result}" STREQUAL "${checkdir}" )
3131
# found a path with versioning
32+
if(${CMAKE_MATCH_2} LESS "7" OR ${CMAKE_MATCH_2} EQUAL "7" AND ${CMAKE_MATCH_3} LESS "2")
33+
message(STATUS "Found OptiX ${CMAKE_MATCH_2}.${CMAKE_MATCH_3}.${CMAKE_MATCH_4}. Expecting 7.2.0 or higher")
34+
continue()
35+
endif()
3236
SET ( ver "${CMAKE_MATCH_2}.${CMAKE_MATCH_3}.${CMAKE_MATCH_4}" )
3337
if ( ver GREATER bestver )
3438
SET ( bestver ${ver} )
@@ -78,10 +82,15 @@ if (DEFINED OPTIX_LOCATION OR DEFINED ENV{OPTIX_LOCATION} )
7882
# Locate by version failed. Handle user override for OPTIX_LOCATION.
7983
string ( REGEX MATCH ".*([0-9]+).([0-9]+).([0-9]+)(.*)$" result "${OPTIX_LOCATION}" )
8084
if ( "${result}" STREQUAL "${OPTIX_LOCATION}" )
85+
if(${CMAKE_MATCH_2} LESS "7" OR ${CMAKE_MATCH_2} EQUAL "7" AND ${CMAKE_MATCH_3} LESS "2")
86+
message(STATUS "Found OptiX ${CMAKE_MATCH_2}.${CMAKE_MATCH_3}.${CMAKE_MATCH_4}. Expecting 7.2.0 or higher")
87+
continue()
88+
else()
8189
SET ( bestver "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" )
8290
SET ( bestmajorver ${CMAKE_MATCH_1})
8391
SET ( bestminorver ${CMAKE_MATCH_2})
8492
Message(STATUS "found version ${bestver}")
93+
endif()
8594
else()
8695
Message(WARNING "Could NOT extract the version from OptiX folder : ${result}")
8796
endif()

examples_tests/39.DenoiserTonemapper/CommandLineHandler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using namespace nbl;
1010
using namespace asset;
1111
using namespace core;
1212

13-
CommandLineHandler::CommandLineHandler(core::vector<std::string> argv, IAssetManager* am) : status(false), assetManager(am)
13+
CommandLineHandler::CommandLineHandler(core::vector<std::string> argv, IAssetManager* am, nbl::io::IFileSystem* fs) : status(false), assetManager(am)
1414
{
1515
auto startEntireTime = std::chrono::steady_clock::now();
1616

@@ -25,7 +25,7 @@ CommandLineHandler::CommandLineHandler(core::vector<std::string> argv, IAssetMan
2525
return;
2626
}
2727

28-
assetManager->addAssetLoader(core::make_smart_refctd_ptr<nbl::ext::MitsubaLoader::CMitsubaLoader>(assetManager));
28+
assetManager->addAssetLoader(core::make_smart_refctd_ptr<nbl::ext::MitsubaLoader::CMitsubaLoader>(am, fs));
2929
core::vector<std::array<std::string, PROPER_CMD_ARGUMENTS_AMOUNT>> argvMappedList;
3030

3131
auto pushArgvList = [&](auto argvStream, auto variableCount)

examples_tests/39.DenoiserTonemapper/CommandLineHandler.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ class CommandLineHandler
181181
{
182182
public:
183183

184-
CommandLineHandler(nbl::core::vector<std::string> argv, nbl::asset::IAssetManager* am);
184+
CommandLineHandler(nbl::core::vector<std::string> argv, nbl::asset::IAssetManager* am, nbl::io::IFileSystem* fs);
185185

186186
auto getInputFilesAmount()
187187
{

examples_tests/39.DenoiserTonemapper/main.cpp

Lines changed: 90 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,13 @@ bool check_error(bool cond, const char* message)
4949
os::Printer::log(message, ELL_ERROR);
5050
return cond;
5151
}
52-
/*
53-
if (check_error(,"!"))
54-
return error_code;
55-
*/
52+
53+
constexpr uint32_t overlap = 64;
54+
//constexpr uint32_t tileWidth = 1920/2, tileHeight = 1080/2;
55+
constexpr uint32_t tileWidth = 1024, tileHeight = 1024;
56+
constexpr uint32_t tileWidthWithOverlap = tileWidth + overlap * 2;
57+
constexpr uint32_t tileHeightWithOverlap = tileHeight + overlap * 2;
58+
constexpr uint32_t outputDimensions[] = { tileWidth ,tileHeight };
5659

5760
int main(int argc, char* argv[])
5861
{
@@ -94,8 +97,8 @@ int main(int argc, char* argv[])
9497

9598
return arguments;
9699
};
97-
98-
auto cmdHandler = CommandLineHandler(getArgvFetchedList(), am);
100+
101+
auto cmdHandler = CommandLineHandler(getArgvFetchedList(), am, device->getFileSystem());
99102

100103
if (check_error(!cmdHandler.getStatus(),"Could not parse input commands!"))
101104
return error_code;
@@ -136,7 +139,7 @@ int main(int argc, char* argv[])
136139
constexpr auto forcedOptiXFormatPixelStride = 6u;
137140
DenoiserToUse denoisers[EII_COUNT];
138141
{
139-
OptixDenoiserOptions opts = { OPTIX_DENOISER_INPUT_RGB,forcedOptiXFormat };
142+
OptixDenoiserOptions opts = { OPTIX_DENOISER_INPUT_RGB };
140143
denoisers[EII_COLOR].m_denoiser = m_optixContext->createDenoiser(&opts);
141144
if (check_error(!denoisers[EII_COLOR].m_denoiser, "Could not create Optix Color Denoiser!"))
142145
return error_code;
@@ -428,7 +431,7 @@ void main()
428431

429432
core::vector<ImageToDenoise> images(inputFilesAmount);
430433
// load images
431-
uint32_t maxResolution[EII_COUNT][2] = { 0 };
434+
uint32_t maxResolution[2] = { 0,0 };
432435
{
433436
asset::IAssetLoader::SAssetLoadParams lp(0ull,nullptr);
434437

@@ -458,24 +461,24 @@ void main()
458461
uint32_t pickedChannel = 0u;
459462
auto contents = assetBundle.getContents();
460463
if (channelName.has_value())
461-
for (auto& asset : contents)
462-
{
463-
assert(asset);
464-
465-
auto metadata = asset->getMetadata();
466-
auto exrmeta = static_cast<COpenEXRImageMetadata*>(metadata);
467-
if (strcmp(metadata->getLoaderName(),COpenEXRImageMetadata::LoaderName)!=0)
468-
continue;
469-
else
464+
for (auto& asset : contents)
470465
{
471-
const auto& assetMetaChannelName = exrmeta->getName();
472-
auto found = assetMetaChannelName.find(channelName.value());
473-
if (found>=firstChannelNameOccurence)
466+
assert(asset);
467+
468+
auto metadata = asset->getMetadata();
469+
const auto exrmeta = static_cast<const COpenEXRImageMetadata*>(metadata);
470+
if (strcmp(metadata->getLoaderName(), COpenEXRImageMetadata::LoaderName) != 0)
474471
continue;
475-
firstChannelNameOccurence = found;
476-
pickedChannel = std::distance(contents.begin(), &asset);
472+
else
473+
{
474+
const auto& assetMetaChannelName = exrmeta->getName();
475+
auto found = assetMetaChannelName.find(channelName.value());
476+
if (found >= firstChannelNameOccurence)
477+
continue;
478+
firstChannelNameOccurence = found;
479+
pickedChannel = std::distance(contents.begin(), &asset);
480+
}
477481
}
478-
}
479482

480483
return asset::IAsset::castDown<ICPUImage>(contents.begin()[pickedChannel]);
481484
};
@@ -545,19 +548,15 @@ void main()
545548
const auto& region = regions.begin()[0];
546549
assert(region.bufferRowLength);
547550
outParam.colorTexelSize = asset::getTexelOrBlockBytesize(colorCreationParams.format);
548-
uint32_t bytesize = extent.height*region.bufferRowLength*outParam.colorTexelSize;
549-
if (bytesize>params.StreamingDownloadBufferSize)
550-
{
551-
os::Printer::log(imageIDString + "Image too large to download from GPU in one piece!", ELL_ERROR);
552-
outParam = {};
553-
continue;
554-
}
555551
}
556552

557553
outParam.denoiserType = EII_COLOR;
558554

559555
outParam.width = extent.width;
560556
outParam.height = extent.height;
557+
558+
maxResolution[0] = core::max(maxResolution[0], outParam.width);
559+
maxResolution[1] = core::max(maxResolution[1], outParam.height);
561560
}
562561

563562
auto& albedoImage = outParam.image[EII_ALBEDO];
@@ -590,9 +589,6 @@ void main()
590589
else
591590
outParam.denoiserType = EII_NORMAL;
592591
}
593-
594-
maxResolution[outParam.denoiserType][0] = core::max(maxResolution[outParam.denoiserType][0],outParam.width);
595-
maxResolution[outParam.denoiserType][1] = core::max(maxResolution[outParam.denoiserType][1],outParam.height);
596592
}
597593
}
598594

@@ -606,22 +602,17 @@ void main()
606602
auto& intensityBuffer = bufferLinks[0];
607603
auto& denoiserState = bufferLinks[0];
608604
auto& temporaryPixelBuffer = bufferLinks[1];
609-
auto& imagePixelBuffer = bufferLinks[2];
605+
auto& imagePixelBuffer = bufferLinks[2]; // buffer to store result of denoising of a tile/image
610606
size_t denoiserStateBufferSize = 0ull;
611607
{
612608
size_t scratchBufferSize = 0ull;
613609
size_t pixelBufferSize = 0ull;
614610
for (uint32_t i=0u; i<EII_COUNT; i++)
615611
{
616612
auto& denoiser = denoisers[i].m_denoiser;
617-
if (maxResolution[i][0]==0u || maxResolution[i][1]==0u)
618-
{
619-
denoiser = nullptr;
620-
continue;
621-
}
622613

623614
OptixDenoiserSizes m_denoiserMemReqs;
624-
if (denoiser->computeMemoryResources(&m_denoiserMemReqs, maxResolution[i])!=OPTIX_SUCCESS)
615+
if (denoiser->computeMemoryResources(&m_denoiserMemReqs, outputDimensions)!=OPTIX_SUCCESS)
625616
{
626617
static const char* errorMsgs[EII_COUNT] = { "Failed to compute Color-Denoiser Memory Requirements!",
627618
"Failed to compute Color-Albedo-Denoiser Memory Requirements!",
@@ -633,8 +624,9 @@ void main()
633624

634625
denoisers[i].stateOffset = denoiserStateBufferSize;
635626
denoiserStateBufferSize += denoisers[i].stateSize = m_denoiserMemReqs.stateSizeInBytes;
636-
scratchBufferSize = core::max(scratchBufferSize,denoisers[i].scratchSize = m_denoiserMemReqs.recommendedScratchSizeInBytes);
637-
pixelBufferSize = core::max(pixelBufferSize,core::max(asset::getTexelOrBlockBytesize(EF_R32G32B32A32_SFLOAT),(i+1u)*forcedOptiXFormatPixelStride)*maxResolution[i][0]*maxResolution[i][1]);
627+
scratchBufferSize = core::max(scratchBufferSize, denoisers[i].scratchSize = m_denoiserMemReqs.withOverlapScratchSizeInBytes);
628+
pixelBufferSize = core::max(pixelBufferSize, core::max(asset::getTexelOrBlockBytesize(EF_R32G32B32A32_SFLOAT), (i + 1u) * forcedOptiXFormatPixelStride) * maxResolution[0] * maxResolution[1]);
629+
638630
}
639631
std::string message = "Total VRAM consumption for Denoiser algorithm: ";
640632
os::Printer::log(message+std::to_string(denoiserStateBufferSize+scratchBufferSize+pixelBufferSize), ELL_INFORMATION);
@@ -698,14 +690,14 @@ void main()
698690
{
699691
case ToneMapperClass::EO_REINHARD:
700692
{
701-
auto tp = ToneMapperClass::Params_t<ToneMapperClass::EO_REINHARD>(optiXIntensityKeyCompensation,key,extraParam);
693+
auto tp = ToneMapperClass::Params_t<ToneMapperClass::EO_REINHARD>(optiXIntensityKeyCompensation, key, extraParam);
702694
shaderConstants.tonemapperParams[0] = tp.keyAndLinearExposure;
703695
shaderConstants.tonemapperParams[1] = tp.rcpWhite2;
704696
break;
705697
}
706698
case ToneMapperClass::EO_ACES:
707699
{
708-
auto tp = ToneMapperClass::Params_t<ToneMapperClass::EO_ACES>(optiXIntensityKeyCompensation,key,extraParam);
700+
auto tp = ToneMapperClass::Params_t<ToneMapperClass::EO_ACES>(optiXIntensityKeyCompensation, key, extraParam);
709701
shaderConstants.tonemapperParams[0] = tp.gamma;
710702
shaderConstants.tonemapperParams[1] = (&tp.gamma)[1];
711703
break;
@@ -724,9 +716,9 @@ void main()
724716
break;
725717
}
726718
}
727-
auto totalSampleCount = param.width*param.height;
728-
shaderConstants.percentileRange[0] = lowerPercentile*float(totalSampleCount);
729-
shaderConstants.percentileRange[1] = upperPercentile*float(totalSampleCount);
719+
auto totalSampleCount = param.width * param.height;
720+
shaderConstants.percentileRange[0] = lowerPercentile * float(totalSampleCount);
721+
shaderConstants.percentileRange[1] = upperPercentile * float(totalSampleCount);
730722
shaderConstants.normalMatrix = cameraTransformBundle[i].value();
731723
}
732724

@@ -822,52 +814,62 @@ void main()
822814
#undef DENOISER_BUFFER_COUNT
823815
core::SRAIIBasedExiter<decltype(unmapBuffers)> exitRoutine(unmapBuffers);
824816

825-
// set up optix image
826-
OptixImage2D denoiserInputs[EII_COUNT];
827-
for (uint32_t j=0u; j<denoiserInputCount; j++)
828-
{
829-
denoiserInputs[j].data = temporaryPixelBuffer.asBuffer.pointer+shaderConstants.outImageOffset[j]*sizeof(uint16_t); // sizeof(float16_t)
830-
denoiserInputs[j].width = param.width;
831-
denoiserInputs[j].height = param.height;
832-
denoiserInputs[j].rowStrideInBytes = param.width*forcedOptiXFormatPixelStride;
833-
denoiserInputs[j].pixelStrideInBytes = 0u;
834-
denoiserInputs[j].format = forcedOptiXFormat;
835-
}
836-
//
817+
cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer> fakeScratchLink;
818+
fakeScratchLink.asBuffer.pointer = denoiserScratch;
819+
820+
// set up denoiser
821+
auto& denoiser = denoisers[param.denoiserType];
822+
if (denoiser.m_denoiser->setup(m_cudaStream, outputDimensions, denoiserState, denoiser.stateSize, fakeScratchLink, denoiser.scratchSize, denoiser.stateOffset) != OPTIX_SUCCESS)
837823
{
838-
cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer> fakeScratchLink;
839-
fakeScratchLink.asBuffer.pointer = denoiserScratch;
824+
os::Printer::log(makeImageIDString(i) + "Could not setup the denoiser for the image resolution and denoiser buffers, skipping image!", ELL_ERROR);
825+
continue;
826+
}
827+
828+
//invocation params
829+
OptixDenoiserParams denoiserParams = {};
830+
denoiserParams.blendFactor = denoiserBlendFactorBundle[i].value();
831+
denoiserParams.denoiseAlpha = 0u;
832+
denoiserParams.hdrIntensity = intensityBuffer.asBuffer.pointer + intensityBufferOffset;
833+
834+
//input with RGB, Albedo, Normals
835+
OptixImage2D denoiserInputs[EII_COUNT];
836+
OptixImage2D denoiserOutput;
840837

841-
// set up denoiser
842-
auto& denoiser = denoisers[param.denoiserType];
843-
if (denoiser.m_denoiser->setup(m_cudaStream,&param.width,denoiserState,denoiser.stateSize,fakeScratchLink,denoiser.scratchSize,denoiser.stateOffset)!=OPTIX_SUCCESS)
844-
{
845-
os::Printer::log(makeImageIDString(i) + "Could not setup the denoiser for the image resolution and denoiser buffers, skipping image!", ELL_ERROR);
846-
continue;
847-
}
838+
for (size_t k = 0; k < denoiserInputCount; k++)
839+
{
840+
denoiserInputs[k].data = temporaryPixelBuffer.asBuffer.pointer + shaderConstants.outImageOffset[k] * sizeof(uint16_t);
841+
denoiserInputs[k].width = param.width;
842+
denoiserInputs[k].height = param.height;
843+
denoiserInputs[k].rowStrideInBytes = param.width * forcedOptiXFormatPixelStride;
844+
denoiserInputs[k].format = forcedOptiXFormat;
845+
denoiserInputs[k].pixelStrideInBytes = forcedOptiXFormatPixelStride;
848846

849-
// invoke
850-
{
851-
OptixDenoiserParams denoiserParams = {};
852-
denoiserParams.blendFactor = denoiserBlendFactorBundle[i].value();
853-
denoiserParams.denoiseAlpha = 0u;
854-
denoiserParams.hdrIntensity = intensityBuffer.asBuffer.pointer+intensityBufferOffset;
855-
OptixImage2D denoiserOutput;
856-
denoiserOutput.data = imagePixelBuffer.asBuffer.pointer+shaderConstants.inImageTexelOffset[EII_COLOR];
857-
denoiserOutput.width = param.width;
858-
denoiserOutput.height = param.height;
859-
denoiserOutput.rowStrideInBytes = param.width*forcedOptiXFormatPixelStride;
860-
denoiserOutput.pixelStrideInBytes = 0u;
861-
denoiserOutput.format = forcedOptiXFormat;
862-
if (denoiser.m_denoiser->invoke(m_cudaStream,&denoiserParams,denoiserInputs,denoiserInputs+denoiserInputCount,&denoiserOutput,fakeScratchLink,denoiser.scratchSize)!=OPTIX_SUCCESS)
863-
{
864-
os::Printer::log(makeImageIDString(i) + "Could not invoke the denoiser sucessfully, skipping image!", ELL_ERROR);
865-
continue;
866-
}
867-
}
868847
}
869848

870-
// unmap buffer (implicit from the SRAIIExiter destructor)
849+
denoiserOutput.data = imagePixelBuffer.asBuffer.pointer + shaderConstants.inImageTexelOffset[EII_COLOR];
850+
denoiserOutput.width = param.width;
851+
denoiserOutput.height = param.height;
852+
denoiserOutput.rowStrideInBytes = param.width * forcedOptiXFormatPixelStride;
853+
denoiserOutput.format = forcedOptiXFormat;
854+
denoiserOutput.pixelStrideInBytes = forcedOptiXFormatPixelStride;
855+
856+
//invoke
857+
if (denoiser.m_denoiser->tileAndInvoke(
858+
m_cudaStream,
859+
&denoiserParams,
860+
denoiserInputs,
861+
denoiserInputCount,
862+
&denoiserOutput,
863+
fakeScratchLink,
864+
denoiser.scratchSize,
865+
overlap,
866+
tileWidth,
867+
tileHeight
868+
) != OPTIX_SUCCESS)
869+
{
870+
os::Printer::log(makeImageIDString(i) + "Could not invoke the denoiser sucessfully, skipping image!", ELL_ERROR);
871+
continue;
872+
}
871873
}
872874

873875
// compute post-processing

include/nbl/ext/OptiX/IDenoiser.h

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
#include "../../../../src/nbl/video/CCUDAHandler.h"
99

10-
#include "optix.h"
10+
#include <optix.h>
11+
#include <optix_denoiser_tiling.h>
1112

1213
namespace nbl
1314
{
@@ -71,6 +72,38 @@ class IDenoiser final : public core::IReferenceCounted
7172
scratchBuffer.asBuffer.pointer+scratchBufferOffset,scratchSizeInBytes);
7273
}
7374

75+
inline OptixResult tileAndInvoke(
76+
CUstream stream,
77+
const OptixDenoiserParams* params,
78+
const OptixImage2D* inputLayers,
79+
unsigned int numInputLayers,
80+
const OptixImage2D* outputLayer,
81+
const cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer>& scratch,
82+
size_t scratchSizeInBytes,
83+
unsigned int overlapWindowSizeInPixels,
84+
unsigned int tileWidth,
85+
unsigned int tileHeight,
86+
const cuda::CCUDAHandler::GraphicsAPIObjLink<video::IGPUBuffer>& denoiserState = {},
87+
size_t denoiserStateSizeInBytes = 0ull)
88+
{
89+
if (alreadySetup != OPTIX_SUCCESS)
90+
return alreadySetup;
91+
return optixUtilDenoiserInvokeTiled(
92+
denoiser,
93+
stream,
94+
params,
95+
denoiserState.asBuffer.pointer,
96+
denoiserStateSizeInBytes,
97+
inputLayers,
98+
numInputLayers,
99+
outputLayer,
100+
scratch.asBuffer.pointer,
101+
scratchSizeInBytes,
102+
overlapWindowSizeInPixels,
103+
tileWidth,
104+
tileHeight);
105+
}
106+
74107
protected:
75108
friend class OptiX::IContext;
76109

0 commit comments

Comments
 (0)