Skip to content

Commit 3e035ef

Browse files
cache draft
1 parent 3c018ba commit 3e035ef

File tree

3 files changed

+124
-68
lines changed

3 files changed

+124
-68
lines changed

examples_tests/22.RaytracedAO/Renderer.cpp

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -805,11 +805,98 @@ core::smart_refctd_ptr<IGPUImageView> Renderer::createScreenSizedTexture(E_FORMA
805805
return m_driver->createGPUImageView(std::move(viewparams));
806806
}
807807

808+
809+
core::smart_refctd_ptr<asset::ICPUBuffer> Renderer::SampleSequence::createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount)
810+
{
811+
const size_t bytesize = SampleSequence::QuantizedDimensionsBytesize*quantizedDimensions*sampleCount;
812+
if (bytesize)
813+
return core::make_smart_refctd_ptr<asset::ICPUBuffer>(bytesize);
814+
else
815+
return nullptr;
816+
}
817+
void Renderer::SampleSequence::createBufferView(IVideoDriver* driver, core::smart_refctd_ptr<asset::ICPUBuffer>&& buff)
818+
{
819+
auto gpubuf = driver->createFilledDeviceLocalGPUBufferOnDedMem(buff->getSize(),buff->getPointer());
820+
bufferView = driver->createGPUBufferView(gpubuf.get(),asset::EF_R32G32_UINT);
821+
}
822+
core::smart_refctd_ptr<ICPUBuffer> Renderer::SampleSequence::createBufferView(IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount)
823+
{
824+
constexpr auto DimensionsPerQuanta = 3u;
825+
const auto dimensions = quantizedDimensions*DimensionsPerQuanta;
826+
core::OwenSampler sampler(dimensions,0xdeadbeefu);
827+
828+
// Memory Order: 3 Dimensions, then multiple of sampling stragies per vertex, then depth, then sample ID
829+
auto buff = createCPUBuffer(quantizedDimensions,sampleCount);
830+
uint32_t(&pout)[][2] = *reinterpret_cast<uint32_t(*)[][2]>(buff->getPointer());
831+
// the horrible order of iteration over output memory is caused by the fact that certain samplers like the
832+
// Owen Scramble sampler, have a large cache which needs to be generated separately for each dimension.
833+
for (auto metadim=0u; metadim<quantizedDimensions; metadim++)
834+
{
835+
const auto trudim = metadim*DimensionsPerQuanta;
836+
for (uint32_t i=0; i<sampleCount; i++)
837+
pout[i*quantizedDimensions+metadim][0] = sampler.sample(trudim+0u,i);
838+
for (uint32_t i=0; i<sampleCount; i++)
839+
pout[i*quantizedDimensions+metadim][1] = sampler.sample(trudim+1u,i);
840+
for (uint32_t i=0; i<sampleCount; i++)
841+
{
842+
const auto sample = sampler.sample(trudim+2u,i);
843+
const auto out = pout[i*quantizedDimensions+metadim];
844+
out[0] &= 0xFFFFF800u;
845+
out[0] |= sample>>21;
846+
out[1] &= 0xFFFFF800u;
847+
out[1] |= (sample>>10)&0x07FFu;
848+
}
849+
}
850+
// upload sequence to GPU
851+
createBufferView(driver,core::smart_refctd_ptr(buff));
852+
// return for caching
853+
return buff;
854+
}
855+
808856
// TODO: be able to fail
809-
void Renderer::initSceneResources(SAssetBundle& meshes)
857+
void Renderer::initSceneResources(SAssetBundle& meshes, nbl::io::path&& _sampleSequenceCachePath)
810858
{
811859
deinitSceneResources();
812860

861+
// load cache
862+
uint32_t quantizedDimensions = QUANTIZED_DIMENSIONS_PER_SAMPLE;
863+
uint32_t sampleCount = MaxSamples;
864+
{
865+
core::smart_refctd_ptr<ICPUBuffer> cachebuff;
866+
uint32_t cachedQuantizedDimensions=0u,cachedSampleCount=0u;
867+
{
868+
sampleSequenceCachePath = std::move(_sampleSequenceCachePath);
869+
io::IReadFile* cacheFile = m_assetManager->getFileSystem()->createAndOpenFile(sampleSequenceCachePath);
870+
if (cacheFile)
871+
{
872+
cacheFile->read(&cachedQuantizedDimensions,sizeof(cachedQuantizedDimensions));
873+
if (cachedQuantizedDimensions)
874+
{
875+
cachedSampleCount = (cacheFile->getSize()-cacheFile->getPos())/(cachedQuantizedDimensions*SampleSequence::QuantizedDimensionsBytesize);
876+
cachebuff = sampleSequence.createCPUBuffer(cachedQuantizedDimensions,cachedSampleCount);
877+
if (cachebuff)
878+
cacheFile->read(cachebuff->getPointer(),cachebuff->getSize());
879+
}
880+
cacheFile->drop();
881+
}
882+
}
883+
if (cachedQuantizedDimensions>=quantizedDimensions && cachedSampleCount>=sampleCount)
884+
sampleSequence.createBufferView(m_driver,std::move(cachebuff));
885+
else
886+
{
887+
cachebuff = sampleSequence.createBufferView(m_driver,quantizedDimensions,sampleCount);
888+
// save sequence
889+
io::IWriteFile* cacheFile = m_assetManager->getFileSystem()->createAndWriteFile(sampleSequenceCachePath);
890+
if (cacheFile)
891+
{
892+
cacheFile->write(&quantizedDimensions,sizeof(quantizedDimensions));
893+
cacheFile->write(cachebuff->getPointer(),cachebuff->getSize());
894+
cacheFile->drop();
895+
}
896+
}
897+
}
898+
899+
813900
// set up Descriptor Sets
814901
{
815902
// captures m_globalBackendDataDS, creates m_indirectDrawBuffers, sets up m_mdiDrawCalls ranges
@@ -972,7 +1059,9 @@ void Renderer::deinitSceneResources()
9721059
rrShapes.clear();
9731060
}
9741061

975-
void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::smart_refctd_ptr<ICPUBuffer>&& sampleSequence)
1062+
constexpr auto DefaultPathDepth = 8u;
1063+
constexpr auto MaxPathDepth = 255u;
1064+
void Renderer::initScreenSizedResources(uint32_t width, uint32_t height)
9761065
{
9771066
m_staticViewData.imageDimensions = {width, height};
9781067
m_rcpPixelSize = { 2.f/float(m_staticViewData.imageDimensions.x),-2.f/float(m_staticViewData.imageDimensions.y) };
@@ -1028,7 +1117,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::s
10281117
if (m_staticViewData.pathDepth==0)
10291118
{
10301119
printf("[ERROR] No suppoerted Integrator found in the Mitsuba XML, setting default.\n");
1031-
m_staticViewData.pathDepth = 8u;
1120+
m_staticViewData.pathDepth = DefaultPathDepth;
10321121
}
10331122
else if (m_staticViewData.pathDepth>MAX_PATH_DEPTH)
10341123
{
@@ -1166,11 +1255,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::s
11661255
{
11671256
_staticViewDataBuffer = createFilledBufferAndSetUpInfoFromStruct(infos+0,m_staticViewData);
11681257
staticViewDataBufferSize = _staticViewDataBuffer->getSize();
1169-
{
1170-
// upload sequence to GPU
1171-
auto gpubuf = m_driver->createFilledDeviceLocalGPUBufferOnDedMem(sampleSequence->getSize(),sampleSequence->getPointer());
1172-
infos[1].desc = m_driver->createGPUBufferView(gpubuf.get(),asset::EF_R32G32_UINT);
1173-
}
1258+
infos[1].desc = sampleSequence.getBufferView();
11741259
setImageInfo(infos+2,asset::EIL_GENERAL,core::smart_refctd_ptr(m_accumulation));
11751260
setImageInfo(infos+5,asset::EIL_GENERAL,core::smart_refctd_ptr(m_albedoAcc));
11761261
setImageInfo(infos+6,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalAcc));
@@ -1301,7 +1386,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::s
13011386
std::cout << "\nScreen Sized Resources have been initialized (" << width << "x" << height << ")" << std::endl;
13021387
std::cout << "\tStaticViewData = " << staticViewDataBufferSize << " bytes" << std::endl;
13031388
std::cout << "\tScrambleBuffer = " << scrambleBufferSize << " bytes" << std::endl;
1304-
std::cout << "\tSampleSequence = " << sampleSequence->getSize() << " bytes" << std::endl;
1389+
std::cout << "\tSampleSequence = " << sampleSequence.getBufferView()->getByteSize() << " bytes" << std::endl;
13051390
std::cout << "\tRayCount Buffer = " << m_rayCountBuffer->getSize() << " bytes" << std::endl;
13061391
for (auto i=0u; i<2u; i++)
13071392
std::cout << "\tIntersection Buffer[" << i << "] = " << m_intersectionBuffer[i].buffer->getSize() << " bytes" << std::endl;
@@ -1362,7 +1447,7 @@ void Renderer::deinitScreenSizedResources()
13621447
m_resolvePipeline = nullptr;
13631448

13641449
m_staticViewData.imageDimensions = {0u, 0u};
1365-
m_staticViewData.pathDepth = 8u;
1450+
m_staticViewData.pathDepth = DefaultPathDepth;
13661451
m_staticViewData.noRussianRouletteDepth = 5u;
13671452
m_staticViewData.samplesPerPixelPerDispatch = 1u;
13681453
m_totalRaysCast = 0ull;

examples_tests/22.RaytracedAO/Renderer.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
4040

4141
Renderer(nbl::video::IVideoDriver* _driver, nbl::asset::IAssetManager* _assetManager, nbl::scene::ISceneManager* _smgr, bool useDenoiser = true);
4242

43-
void initSceneResources(nbl::asset::SAssetBundle& meshes);
43+
void initSceneResources(nbl::asset::SAssetBundle& meshes, nbl::io::path&& _sampleSequenceCachePath="");
4444

4545
void deinitSceneResources();
4646

47-
void initScreenSizedResources(uint32_t width, uint32_t height, nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer>&& sampleSequence);
47+
void initScreenSizedResources(uint32_t width, uint32_t height);
4848

4949
void deinitScreenSizedResources();
5050

@@ -69,7 +69,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
6969
}
7070
uint64_t getTotalSamplesComputed() const
7171
{
72-
const auto samplesPerDispatch = static_cast<uint64_t>(getSamplesPerPixelPerDispatch()*m_staticViewData.imageDimensions.x*m_staticViewData.imageDimensions.y);
72+
const auto samplesPerDispatch = getSamplesPerPixelPerDispatch()*static_cast<uint64_t>(m_staticViewData.imageDimensions.x*m_staticViewData.imageDimensions.y);
7373
const auto framesDispatched = static_cast<uint64_t>(m_framesDispatched);
7474
return framesDispatched*samplesPerDispatch;
7575
}
@@ -118,7 +118,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
118118
void finalizeScene(InitializationData& initData);
119119

120120
//
121-
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers = 0u);
121+
nbl::core::smart_refctd_ptr<nbl::video::IGPUImageView> createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers=0u);
122+
void genSampleSequenceBufferView(uint32_t quantizedDimensions, uint32_t sampleCount);
122123

123124
//
124125
void preDispatch(const nbl::video::IGPUPipelineLayout* layout, nbl::video::IGPUDescriptorSet*const *const lastDS);
@@ -157,6 +158,27 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
157158
nbl::core::smart_refctd_ptr<IGPUSpecializedShader> m_closestHitGPUShader;
158159
nbl::core::smart_refctd_ptr<IGPUSpecializedShader> m_resolveGPUShader;
159160

161+
// semi persistent data
162+
nbl::io::path sampleSequenceCachePath;
163+
struct SampleSequence
164+
{
165+
public:
166+
static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t);
167+
SampleSequence() : bufferView() {}
168+
169+
nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer> createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount);
170+
171+
// from cache
172+
void createBufferView(nbl::video::IVideoDriver* driver, nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer>&& buff);
173+
// regenerate
174+
nbl::core::smart_refctd_ptr<nbl::asset::ICPUBuffer> createBufferView(nbl::video::IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount);
175+
176+
auto getBufferView() const {return bufferView;}
177+
178+
private:
179+
nbl::core::smart_refctd_ptr<nbl::video::IGPUBufferView> bufferView;
180+
} sampleSequence;
181+
160182
// scene specific data
161183
nbl::core::vector<::RadeonRays::Shape*> rrShapes;
162184
nbl::core::vector<::RadeonRays::Shape*> rrInstances;

examples_tests/22.RaytracedAO/main.cpp

Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -558,58 +558,7 @@ int main(int argc, char** argv)
558558
auto driver = device->getVideoDriver();
559559

560560
core::smart_refctd_ptr<Renderer> renderer = core::make_smart_refctd_ptr<Renderer>(driver,device->getAssetManager(),smgr);
561-
auto sampleSequence = core::make_smart_refctd_ptr<asset::ICPUBuffer>(sizeof(uint64_t)*Renderer::MaxSamples*QUANTIZED_DIMENSIONS_PER_SAMPLE);
562-
{
563-
bool generateNewSamples = true;
564-
565-
io::IReadFile* cacheFile = device->getFileSystem()->createAndOpenFile("../../tmp/rtSamples.bin");
566-
if (cacheFile)
567-
{
568-
if (cacheFile->getSize()==sampleSequence->getSize()) // light validation
569-
{
570-
cacheFile->read(sampleSequence->getPointer(),sampleSequence->getSize());
571-
//generateNewSamples = false;
572-
}
573-
cacheFile->drop();
574-
}
575-
576-
if (generateNewSamples)
577-
{
578-
constexpr auto DimensionsPerQuanta = 3u;
579-
core::OwenSampler sampler(QUANTIZED_DIMENSIONS_PER_SAMPLE*DimensionsPerQuanta,0xdeadbeefu);
580-
581-
// Memory Order: 3 Dimensions, then multiple of sampling stragies per vertex, then depth, then sample ID
582-
uint32_t(&pout)[][2] = *reinterpret_cast<uint32_t(*)[][2]>(sampleSequence->getPointer());
583-
// the horrible order of iteration over output memory is caused by the fact that certain samplers like the
584-
// Owen Scramble sampler, have a large cache which needs to be generated separately for each dimension.
585-
for (auto metadim=0u; metadim<QUANTIZED_DIMENSIONS_PER_SAMPLE; metadim++)
586-
{
587-
const auto trudim = metadim*DimensionsPerQuanta;
588-
for (uint32_t i=0; i<Renderer::MaxSamples; i++)
589-
pout[i*QUANTIZED_DIMENSIONS_PER_SAMPLE+metadim][0] = sampler.sample(trudim+0u,i);
590-
for (uint32_t i=0; i<Renderer::MaxSamples; i++)
591-
pout[i*QUANTIZED_DIMENSIONS_PER_SAMPLE+metadim][1] = sampler.sample(trudim+1u,i);
592-
for (uint32_t i=0; i<Renderer::MaxSamples; i++)
593-
{
594-
const auto sample = sampler.sample(trudim+2u,i);
595-
const auto out = pout[i*QUANTIZED_DIMENSIONS_PER_SAMPLE+metadim];
596-
out[0] &= 0xFFFFF800u;
597-
out[0] |= sample>>21;
598-
out[1] &= 0xFFFFF800u;
599-
out[1] |= (sample>>10)&0x07FFu;
600-
}
601-
}
602-
603-
io::IWriteFile* cacheFile = device->getFileSystem()->createAndWriteFile("../../tmp/rtSamples.bin");
604-
if (cacheFile)
605-
{
606-
cacheFile->write(sampleSequence->getPointer(),sampleSequence->getSize());
607-
cacheFile->drop();
608-
}
609-
}
610-
}
611-
612-
renderer->initSceneResources(meshes);
561+
renderer->initSceneResources(meshes,"LowDiscrepancySequenceCache.bin");
613562
meshes = {}; // free memory
614563

615564
RaytracerExampleEventReceiver receiver;
@@ -671,7 +620,7 @@ int main(int argc, char** argv)
671620
if(needsReinit)
672621
{
673622
renderer->deinitScreenSizedResources();
674-
renderer->initScreenSizedResources(sensorData.width, sensorData.height, std::move(sampleSequence));
623+
renderer->initScreenSizedResources(sensorData.width,sensorData.height);
675624
}
676625

677626
smgr->setActiveCamera(sensorData.staticCamera);
@@ -756,7 +705,7 @@ int main(int argc, char** argv)
756705
if(needsReinit)
757706
{
758707
renderer->deinitScreenSizedResources();
759-
renderer->initScreenSizedResources(sensors[activeSensor].width, sensors[activeSensor].height, std::move(sampleSequence));
708+
renderer->initScreenSizedResources(sensors[activeSensor].width,sensors[activeSensor].height);
760709
}
761710

762711
smgr->setActiveCamera(sensors[activeSensor].interactiveCamera);

0 commit comments

Comments
 (0)