Skip to content

Commit 1004b44

Browse files
sample generation cache works
1 parent 3e035ef commit 1004b44

File tree

4 files changed

+135
-115
lines changed

4 files changed

+135
-115
lines changed

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 119 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,55 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
190190
InitializationData retval;
191191
m_globalMeta = meshes.getMetadata()->selfCast<const ext::MitsubaLoader::CMitsubaMetadata>();
192192
assert(m_globalMeta );
193+
194+
//
195+
{
196+
// extract integrator parameters
197+
std::stack<const ext::MitsubaLoader::CElementIntegrator*> integratorStack;
198+
integratorStack.push(&m_globalMeta->m_global.m_integrator);
199+
while (!integratorStack.empty())
200+
{
201+
auto integrator = integratorStack.top();
202+
integratorStack.pop();
203+
using Enum = ext::MitsubaLoader::CElementIntegrator::Type;
204+
switch (integrator->type)
205+
{
206+
case Enum::DIRECT:
207+
pathDepth = 2u;
208+
break;
209+
case Enum::PATH:
210+
case Enum::VOL_PATH_SIMPLE:
211+
case Enum::VOL_PATH:
212+
case Enum::BDPT:
213+
pathDepth = integrator->bdpt.maxPathDepth;
214+
noRussianRouletteDepth = integrator->bdpt.russianRouletteDepth-1u;
215+
break;
216+
case Enum::ADAPTIVE:
217+
for (size_t i=0u; i<integrator->multichannel.childCount; i++)
218+
integratorStack.push(integrator->multichannel.children[i]);
219+
break;
220+
case Enum::IRR_CACHE:
221+
assert(false);
222+
break;
223+
case Enum::MULTI_CHANNEL:
224+
for (size_t i=0u; i<integrator->multichannel.childCount; i++)
225+
integratorStack.push(integrator->multichannel.children[i]);
226+
break;
227+
default:
228+
break;
229+
};
230+
}
231+
232+
//
233+
retval.maxSensorSamples = MaxFreeviewSamples;
234+
for (const auto& sensor : m_globalMeta->m_global.m_sensors)
235+
{
236+
if (retval.maxSensorSamples<sensor.sampler.sampleCount)
237+
retval.maxSensorSamples = sensor.sampler.sampleCount;
238+
}
239+
}
240+
241+
//
193242
auto* _globalBackendDataDS = m_globalMeta ->m_global.m_ds0.get();
194243

195244
auto* instanceDataDescPtr = _globalBackendDataDS->getDescriptors(5u).begin();
@@ -853,49 +902,13 @@ core::smart_refctd_ptr<ICPUBuffer> Renderer::SampleSequence::createBufferView(IV
853902
return buff;
854903
}
855904

905+
//
906+
856907
// TODO: be able to fail
857908
void Renderer::initSceneResources(SAssetBundle& meshes, nbl::io::path&& _sampleSequenceCachePath)
858909
{
859910
deinitSceneResources();
860911

861-
// load cache
862-
uint32_t quantizedDimensions = QUANTIZED_DIMENSIONS_PER_SAMPLE;
863-
uint32_t sampleCount = MaxSamples;
864-
{
865-
core::smart_refctd_ptr<ICPUBuffer> cachebuff;
866-
uint32_t cachedQuantizedDimensions=0u,cachedSampleCount=0u;
867-
{
868-
sampleSequenceCachePath = std::move(_sampleSequenceCachePath);
869-
io::IReadFile* cacheFile = m_assetManager->getFileSystem()->createAndOpenFile(sampleSequenceCachePath);
870-
if (cacheFile)
871-
{
872-
cacheFile->read(&cachedQuantizedDimensions,sizeof(cachedQuantizedDimensions));
873-
if (cachedQuantizedDimensions)
874-
{
875-
cachedSampleCount = (cacheFile->getSize()-cacheFile->getPos())/(cachedQuantizedDimensions*SampleSequence::QuantizedDimensionsBytesize);
876-
cachebuff = sampleSequence.createCPUBuffer(cachedQuantizedDimensions,cachedSampleCount);
877-
if (cachebuff)
878-
cacheFile->read(cachebuff->getPointer(),cachebuff->getSize());
879-
}
880-
cacheFile->drop();
881-
}
882-
}
883-
if (cachedQuantizedDimensions>=quantizedDimensions && cachedSampleCount>=sampleCount)
884-
sampleSequence.createBufferView(m_driver,std::move(cachebuff));
885-
else
886-
{
887-
cachebuff = sampleSequence.createBufferView(m_driver,quantizedDimensions,sampleCount);
888-
// save sequence
889-
io::IWriteFile* cacheFile = m_assetManager->getFileSystem()->createAndWriteFile(sampleSequenceCachePath);
890-
if (cacheFile)
891-
{
892-
cacheFile->write(&quantizedDimensions,sizeof(quantizedDimensions));
893-
cacheFile->write(cachebuff->getPointer(),cachebuff->getSize());
894-
cacheFile->drop();
895-
}
896-
}
897-
}
898-
899912

900913
// set up Descriptor Sets
901914
{
@@ -1008,9 +1021,67 @@ void Renderer::initSceneResources(SAssetBundle& meshes, nbl::io::path&& _sampleS
10081021
std::cout << "\tindexBuffer = " << m_indexBuffer->getSize() << " bytes" << std::endl;
10091022
for (auto i=0u; i<2u; i++)
10101023
std::cout << "\tIndirect Draw Buffers[" << i << "] = " << m_indirectDrawBuffers[i]->getSize() << " bytes" << std::endl;
1011-
std::cout << std::endl;
1024+
}
1025+
1026+
// load sample cache
1027+
{
1028+
core::smart_refctd_ptr<ICPUBuffer> cachebuff;
1029+
uint32_t cachedQuantizedDimensions=0u,cachedSampleCount=0u;
1030+
{
1031+
sampleSequenceCachePath = std::move(_sampleSequenceCachePath);
1032+
io::IReadFile* cacheFile = m_assetManager->getFileSystem()->createAndOpenFile(sampleSequenceCachePath);
1033+
if (cacheFile)
1034+
{
1035+
cacheFile->read(&cachedQuantizedDimensions,sizeof(cachedQuantizedDimensions));
1036+
if (cachedQuantizedDimensions)
1037+
{
1038+
cachedSampleCount = (cacheFile->getSize()-cacheFile->getPos())/(cachedQuantizedDimensions*SampleSequence::QuantizedDimensionsBytesize);
1039+
cachebuff = sampleSequence.createCPUBuffer(cachedQuantizedDimensions,cachedSampleCount);
1040+
if (cachebuff)
1041+
cacheFile->read(cachebuff->getPointer(),cachebuff->getSize());
1042+
}
1043+
cacheFile->drop();
1044+
}
1045+
}
1046+
// lets keep path length within bounds of sanity
1047+
constexpr auto MaxPathDepth = 255u;
1048+
if (pathDepth==0)
1049+
{
1050+
printf("[ERROR] No suppoerted Integrator found in the Mitsuba XML, setting default.\n");
1051+
pathDepth = DefaultPathDepth;
1052+
}
1053+
else if (pathDepth>MaxPathDepth)
1054+
{
1055+
printf("[WARNING] Path Depth %d greater than maximum supported, clamping to %d\n",pathDepth,MaxPathDepth);
1056+
pathDepth = MaxPathDepth;
1057+
}
1058+
const uint32_t quantizedDimensions = SampleSequence::computeQuantizedDimensions(pathDepth);
1059+
// The primary limiting factor is the precision of turning a fixed point grid sample to IEEE754 32bit float in the [0,1] range.
1060+
// Mantissa is only 23 bits, and primary sample space low discrepancy sequence will start to produce duplicates
1061+
// near 1.0 with exponent -1 after the sample count passes 2^24 elements.
1062+
// Another limiting factor is our encoding of sample sequences, we only use 21bits per channel, so no duplicates till 2^21 samples.
1063+
initData.maxSensorSamples = core::min(0x1<<21,initData.maxSensorSamples);
1064+
if (cachedQuantizedDimensions>=quantizedDimensions && cachedSampleCount>=initData.maxSensorSamples)
1065+
sampleSequence.createBufferView(m_driver,std::move(cachebuff));
1066+
else
1067+
{
1068+
printf("[INFO] Generating Low Discrepancy Sample Sequence Cache, please wait...\n");
1069+
cachebuff = sampleSequence.createBufferView(m_driver,quantizedDimensions,initData.maxSensorSamples);
1070+
// save sequence
1071+
io::IWriteFile* cacheFile = m_assetManager->getFileSystem()->createAndWriteFile(sampleSequenceCachePath);
1072+
if (cacheFile)
1073+
{
1074+
cacheFile->write(&quantizedDimensions,sizeof(quantizedDimensions));
1075+
cacheFile->write(cachebuff->getPointer(),cachebuff->getSize());
1076+
cacheFile->drop();
1077+
}
1078+
}
1079+
std::cout << "\tpathDepth = " << pathDepth << std::endl;
1080+
std::cout << "\tnoRussianRouletteDepth = " << noRussianRouletteDepth << std::endl;
1081+
std::cout << "\tmaxSamples = " << initData.maxSensorSamples << std::endl;
10121082
}
10131083
}
1084+
std::cout << std::endl;
10141085
}
10151086

10161087
void Renderer::deinitSceneResources()
@@ -1057,10 +1128,11 @@ void Renderer::deinitSceneResources()
10571128
for (auto shape : rrShapes)
10581129
rr->DeleteShape(shape);
10591130
rrShapes.clear();
1131+
1132+
pathDepth = DefaultPathDepth;
1133+
noRussianRouletteDepth = 5u;
10601134
}
10611135

1062-
constexpr auto DefaultPathDepth = 8u;
1063-
constexpr auto MaxPathDepth = 255u;
10641136
void Renderer::initScreenSizedResources(uint32_t width, uint32_t height)
10651137
{
10661138
m_staticViewData.imageDimensions = {width, height};
@@ -1075,62 +1147,14 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height)
10751147
size_t scrambleBufferSize=0u;
10761148
size_t raygenBufferSize=0u,intersectionBufferSize=0u;
10771149
{
1078-
m_staticViewData.pathDepth = 0u;
1079-
m_staticViewData.noRussianRouletteDepth = 5u;
1080-
uint32_t bxdfSamples=1u,maxNEESamples=0u;
1081-
std::stack<const ext::MitsubaLoader::CElementIntegrator*> integratorStack;
1082-
integratorStack.push(&m_globalMeta->m_global.m_integrator);
1083-
while (!integratorStack.empty())
1084-
{
1085-
auto integrator = integratorStack.top();
1086-
integratorStack.pop();
1087-
using Enum = ext::MitsubaLoader::CElementIntegrator::Type;
1088-
switch (integrator->type)
1089-
{
1090-
case Enum::DIRECT:
1091-
m_staticViewData.pathDepth = 2u;
1092-
bxdfSamples = integrator->direct.bsdfSamples;
1093-
maxNEESamples = integrator->direct.emitterSamples;
1094-
break;
1095-
case Enum::PATH:
1096-
case Enum::VOL_PATH_SIMPLE:
1097-
case Enum::VOL_PATH:
1098-
case Enum::BDPT:
1099-
m_staticViewData.pathDepth = integrator->bdpt.maxPathDepth;
1100-
m_staticViewData.noRussianRouletteDepth = integrator->bdpt.russianRouletteDepth-1u;
1101-
break;
1102-
case Enum::ADAPTIVE:
1103-
for (size_t i=0u; i<integrator->multichannel.childCount; i++)
1104-
integratorStack.push(integrator->multichannel.children[i]);
1105-
break;
1106-
case Enum::IRR_CACHE:
1107-
assert(false);
1108-
break;
1109-
case Enum::MULTI_CHANNEL:
1110-
for (size_t i=0u; i<integrator->multichannel.childCount; i++)
1111-
integratorStack.push(integrator->multichannel.children[i]);
1112-
break;
1113-
default:
1114-
break;
1115-
};
1116-
}
1117-
if (m_staticViewData.pathDepth==0)
1118-
{
1119-
printf("[ERROR] No suppoerted Integrator found in the Mitsuba XML, setting default.\n");
1120-
m_staticViewData.pathDepth = DefaultPathDepth;
1121-
}
1122-
else if (m_staticViewData.pathDepth>MAX_PATH_DEPTH)
1123-
{
1124-
printf("[WARNING] Path Depth %d greater than maximum supported, clamping to %d\n",m_staticViewData.pathDepth,MAX_PATH_DEPTH);
1125-
m_staticViewData.pathDepth = MAX_PATH_DEPTH;
1126-
}
1127-
printf("Path Depth %d\n",m_staticViewData.pathDepth);
1128-
printf("No Russian Roulette Until %d\n",m_staticViewData.noRussianRouletteDepth);
1150+
// TODO
1151+
m_staticViewData.pathDepth = pathDepth;
1152+
m_staticViewData.noRussianRouletteDepth = noRussianRouletteDepth;
11291153

11301154
uint32_t _maxRaysPerDispatch = 0u;
1131-
auto setRayBufferSizes = [&bxdfSamples,&maxNEESamples,renderPixelCount,this,&_maxRaysPerDispatch,&raygenBufferSize,&intersectionBufferSize](uint32_t sampleMultiplier) -> void
1155+
auto setRayBufferSizes = [renderPixelCount,this,&_maxRaysPerDispatch,&raygenBufferSize,&intersectionBufferSize](uint32_t sampleMultiplier) -> void
11321156
{
1133-
m_staticViewData.samplesPerPixelPerDispatch = (bxdfSamples+maxNEESamples)*sampleMultiplier;
1157+
m_staticViewData.samplesPerPixelPerDispatch = SAMPLING_STRATEGY_COUNT*sampleMultiplier;
11341158

11351159
const size_t minimumSampleCountPerDispatch = static_cast<size_t>(renderPixelCount)*getSamplesPerPixelPerDispatch();
11361160
_maxRaysPerDispatch = static_cast<uint32_t>(minimumSampleCountPerDispatch);
@@ -1146,18 +1170,15 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height)
11461170
while (sampleMultiplier<0x10000u && raygenBufferSize<=maxSSBOSize && intersectionBufferSize<=maxSSBOSize)
11471171
setRayBufferSizes(++sampleMultiplier);
11481172
if (sampleMultiplier==1u)
1149-
{
1150-
bxdfSamples = 1u;
1151-
maxNEESamples = 0u;
11521173
setRayBufferSizes(sampleMultiplier);
1153-
}
11541174
printf("[INFO] Using %d samples (per pixel) per dispatch\n",getSamplesPerPixelPerDispatch());
11551175
}
11561176
}
11571177

11581178
(std::ofstream("runtime_defines.glsl")
11591179
<< "#define _NBL_EXT_MITSUBA_LOADER_VT_STORAGE_VIEW_COUNT " << m_globalMeta->m_global.getVTStorageViewCount() << "\n"
11601180
<< m_globalMeta->m_global.m_materialCompilerGLSL_declarations
1181+
<< "#define SAMPLE_SEQUENCE_STRIDE " << SampleSequence::computeQuantizedDimensions(pathDepth) << "\n"
11611182
<< "#ifndef MAX_RAYS_GENERATED\n"
11621183
<< "# define MAX_RAYS_GENERATED " << getSamplesPerPixelPerDispatch() << "\n"
11631184
<< "#endif\n"

examples_tests/22.RaytracedAO/Renderer.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,15 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
7878
return m_totalRaysCast;
7979
}
8080

81-
// The primary limiting factor is the precision of turning a fixed point grid sample to IEEE754 32bit float in the [0,1] range.
82-
// Mantissa is only 23 bits, and primary sample space low discrepancy sequence will start to produce duplicates
83-
// near 1.0 with exponent -1 after the sample count passes 2^24 elements.
84-
// Another limiting factor is our encoding of sample sequences, we only use 21bits per channel, so no duplicates till 2^21 samples.
85-
static inline constexpr uint32_t MaxSamples = 0x10000u;// 0x200000;
81+
//! Brief guideline to good path depth limits
82+
// Want to see stuff with indirect lighting on the other side of a pane of glass
83+
// 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light
84+
// Want to see through a glass box, vase, or office
85+
// 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light
86+
// pick higher numbers for better GI and less bias
87+
static inline constexpr uint32_t DefaultPathDepth = 8u;
88+
// TODO: Upload only a subsection of the sample sequence to the GPU, so we can use more samples without trashing VRAM
89+
static inline constexpr uint32_t MaxFreeviewSamples = 0x10000u;
8690

8791
//
8892
static constexpr inline uint32_t AntiAliasingSequenceLength = 1024;
@@ -92,7 +96,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
9296

9397
struct InitializationData
9498
{
95-
InitializationData() : lights(),lightCDF() {}
99+
InitializationData() : lights(),lightCDF(), maxSensorSamples(MaxFreeviewSamples) {}
96100
InitializationData(InitializationData&& other) : InitializationData()
97101
{
98102
operator=(std::move(other));
@@ -112,14 +116,14 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
112116
nbl::core::vector<float> lightPDF;
113117
nbl::core::vector<uint32_t> lightCDF;
114118
};
119+
uint32_t maxSensorSamples;
115120
};
116121
InitializationData initSceneObjects(const nbl::asset::SAssetBundle& meshes);
117122
void initSceneNonAreaLights(InitializationData& initData);
118123
void finalizeScene(InitializationData& initData);
119124

120125
//
121126
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers=0u);
122-
void genSampleSequenceBufferView(uint32_t quantizedDimensions, uint32_t sampleCount);
123127

124128
//
125129
void preDispatch(const nbl::video::IGPUPipelineLayout* layout, nbl::video::IGPUDescriptorSet*const *const lastDS);
@@ -166,6 +170,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
166170
static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t);
167171
SampleSequence() : bufferView() {}
168172

173+
// one less because first path vertex uses a different sequence
174+
static inline uint32_t computeQuantizedDimensions(uint32_t maxPathDepth) {return (maxPathDepth-1)*SAMPLING_STRATEGY_COUNT;}
169175
nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer> createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount);
170176

171177
// from cache
@@ -178,6 +184,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
178184
private:
179185
nbl::core::smart_refctd_ptr<nbl::video::IGPUBufferView> bufferView;
180186
} sampleSequence;
187+
uint16_t pathDepth;
188+
uint16_t noRussianRouletteDepth;
181189

182190
// scene specific data
183191
nbl::core::vector<::RadeonRays::Shape*> rrShapes;

examples_tests/22.RaytracedAO/common.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,8 @@
77

88
#define MAX_TRIANGLES_IN_BATCH 16384
99

10-
//! Brief guideline to good path depth limits
11-
// Want to see stuff with indirect lighting on the other side of a pane of glass
12-
// 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light
13-
// Want to see through a glass box, vase, or office
14-
// 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light
15-
// pick higher numbers for better GI and less bias
16-
#define MAX_PATH_DEPTH 35
1710
// need to bump to 2 in case of NEE + MIS, 3 in case of Path Guiding
1811
#define SAMPLING_STRATEGY_COUNT 1
19-
// One less because the first vertex is rasterized
20-
#define QUANTIZED_DIMENSIONS_PER_SAMPLE ((MAX_PATH_DEPTH-1)*SAMPLING_STRATEGY_COUNT)
2112

2213

2314
#define WORKGROUP_SIZE 256

examples_tests/22.RaytracedAO/raytraceCommon.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ vec3 rand3d(in uvec3 scramble_key, in int _sample, int depth)
255255
// decrement depth because first vertex is rasterized and picked with a different sample sequence
256256
--depth;
257257
//
258-
const nbl_glsl_sampling_quantized3D quant = texelFetch(quantizedSampleSequence,int(_sample)*QUANTIZED_DIMENSIONS_PER_SAMPLE+depth).xy;
258+
const nbl_glsl_sampling_quantized3D quant = texelFetch(quantizedSampleSequence,int(_sample)*SAMPLE_SEQUENCE_STRIDE+depth).xy;
259259
return nbl_glsl_sampling_decodeSample3Dimensions(quant,scramble_key);
260260
}
261261

0 commit comments

Comments
 (0)