@@ -805,11 +805,98 @@ core::smart_refctd_ptr<IGPUImageView> Renderer::createScreenSizedTexture(E_FORMA
805
805
return m_driver->createGPUImageView (std::move (viewparams));
806
806
}
807
807
808
+
809
+ core::smart_refctd_ptr<asset::ICPUBuffer> Renderer::SampleSequence::createCPUBuffer (uint32_t quantizedDimensions, uint32_t sampleCount)
810
+ {
811
+ const size_t bytesize = SampleSequence::QuantizedDimensionsBytesize*quantizedDimensions*sampleCount;
812
+ if (bytesize)
813
+ return core::make_smart_refctd_ptr<asset::ICPUBuffer>(bytesize);
814
+ else
815
+ return nullptr ;
816
+ }
817
+ void Renderer::SampleSequence::createBufferView (IVideoDriver* driver, core::smart_refctd_ptr<asset::ICPUBuffer>&& buff)
818
+ {
819
+ auto gpubuf = driver->createFilledDeviceLocalGPUBufferOnDedMem (buff->getSize (),buff->getPointer ());
820
+ bufferView = driver->createGPUBufferView (gpubuf.get (),asset::EF_R32G32_UINT);
821
+ }
822
+ core::smart_refctd_ptr<ICPUBuffer> Renderer::SampleSequence::createBufferView (IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount)
823
+ {
824
+ constexpr auto DimensionsPerQuanta = 3u ;
825
+ const auto dimensions = quantizedDimensions*DimensionsPerQuanta;
826
+ core::OwenSampler sampler (dimensions,0xdeadbeefu );
827
+
828
+ // Memory Order: 3 Dimensions, then multiple of sampling stragies per vertex, then depth, then sample ID
829
+ auto buff = createCPUBuffer (quantizedDimensions,sampleCount);
830
+ uint32_t (&pout)[][2 ] = *reinterpret_cast <uint32_t (*)[][2 ]>(buff->getPointer ());
831
+ // the horrible order of iteration over output memory is caused by the fact that certain samplers like the
832
+ // Owen Scramble sampler, have a large cache which needs to be generated separately for each dimension.
833
+ for (auto metadim=0u ; metadim<quantizedDimensions; metadim++)
834
+ {
835
+ const auto trudim = metadim*DimensionsPerQuanta;
836
+ for (uint32_t i=0 ; i<sampleCount; i++)
837
+ pout[i*quantizedDimensions+metadim][0 ] = sampler.sample (trudim+0u ,i);
838
+ for (uint32_t i=0 ; i<sampleCount; i++)
839
+ pout[i*quantizedDimensions+metadim][1 ] = sampler.sample (trudim+1u ,i);
840
+ for (uint32_t i=0 ; i<sampleCount; i++)
841
+ {
842
+ const auto sample = sampler.sample (trudim+2u ,i);
843
+ const auto out = pout[i*quantizedDimensions+metadim];
844
+ out[0 ] &= 0xFFFFF800u ;
845
+ out[0 ] |= sample>>21 ;
846
+ out[1 ] &= 0xFFFFF800u ;
847
+ out[1 ] |= (sample>>10 )&0x07FFu ;
848
+ }
849
+ }
850
+ // upload sequence to GPU
851
+ createBufferView (driver,core::smart_refctd_ptr (buff));
852
+ // return for caching
853
+ return buff;
854
+ }
855
+
808
856
// TODO: be able to fail
809
- void Renderer::initSceneResources (SAssetBundle& meshes)
857
+ void Renderer::initSceneResources (SAssetBundle& meshes, nbl::io::path&& _sampleSequenceCachePath )
810
858
{
811
859
deinitSceneResources ();
812
860
861
+ // load cache
862
+ uint32_t quantizedDimensions = QUANTIZED_DIMENSIONS_PER_SAMPLE;
863
+ uint32_t sampleCount = MaxSamples;
864
+ {
865
+ core::smart_refctd_ptr<ICPUBuffer> cachebuff;
866
+ uint32_t cachedQuantizedDimensions=0u ,cachedSampleCount=0u ;
867
+ {
868
+ sampleSequenceCachePath = std::move (_sampleSequenceCachePath);
869
+ io::IReadFile* cacheFile = m_assetManager->getFileSystem ()->createAndOpenFile (sampleSequenceCachePath);
870
+ if (cacheFile)
871
+ {
872
+ cacheFile->read (&cachedQuantizedDimensions,sizeof (cachedQuantizedDimensions));
873
+ if (cachedQuantizedDimensions)
874
+ {
875
+ cachedSampleCount = (cacheFile->getSize ()-cacheFile->getPos ())/(cachedQuantizedDimensions*SampleSequence::QuantizedDimensionsBytesize);
876
+ cachebuff = sampleSequence.createCPUBuffer (cachedQuantizedDimensions,cachedSampleCount);
877
+ if (cachebuff)
878
+ cacheFile->read (cachebuff->getPointer (),cachebuff->getSize ());
879
+ }
880
+ cacheFile->drop ();
881
+ }
882
+ }
883
+ if (cachedQuantizedDimensions>=quantizedDimensions && cachedSampleCount>=sampleCount)
884
+ sampleSequence.createBufferView (m_driver,std::move (cachebuff));
885
+ else
886
+ {
887
+ cachebuff = sampleSequence.createBufferView (m_driver,quantizedDimensions,sampleCount);
888
+ // save sequence
889
+ io::IWriteFile* cacheFile = m_assetManager->getFileSystem ()->createAndWriteFile (sampleSequenceCachePath);
890
+ if (cacheFile)
891
+ {
892
+ cacheFile->write (&quantizedDimensions,sizeof (quantizedDimensions));
893
+ cacheFile->write (cachebuff->getPointer (),cachebuff->getSize ());
894
+ cacheFile->drop ();
895
+ }
896
+ }
897
+ }
898
+
899
+
813
900
// set up Descriptor Sets
814
901
{
815
902
// captures m_globalBackendDataDS, creates m_indirectDrawBuffers, sets up m_mdiDrawCalls ranges
@@ -972,7 +1059,9 @@ void Renderer::deinitSceneResources()
972
1059
rrShapes.clear ();
973
1060
}
974
1061
975
- void Renderer::initScreenSizedResources (uint32_t width, uint32_t height, core::smart_refctd_ptr<ICPUBuffer>&& sampleSequence)
1062
+ constexpr auto DefaultPathDepth = 8u ;
1063
+ constexpr auto MaxPathDepth = 255u ;
1064
+ void Renderer::initScreenSizedResources (uint32_t width, uint32_t height)
976
1065
{
977
1066
m_staticViewData.imageDimensions = {width, height};
978
1067
m_rcpPixelSize = { 2 .f /float (m_staticViewData.imageDimensions .x ),-2 .f /float (m_staticViewData.imageDimensions .y ) };
@@ -1028,7 +1117,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::s
1028
1117
if (m_staticViewData.pathDepth ==0 )
1029
1118
{
1030
1119
printf (" [ERROR] No suppoerted Integrator found in the Mitsuba XML, setting default.\n " );
1031
- m_staticViewData.pathDepth = 8u ;
1120
+ m_staticViewData.pathDepth = DefaultPathDepth ;
1032
1121
}
1033
1122
else if (m_staticViewData.pathDepth >MAX_PATH_DEPTH)
1034
1123
{
@@ -1166,11 +1255,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::s
1166
1255
{
1167
1256
_staticViewDataBuffer = createFilledBufferAndSetUpInfoFromStruct (infos+0 ,m_staticViewData);
1168
1257
staticViewDataBufferSize = _staticViewDataBuffer->getSize ();
1169
- {
1170
- // upload sequence to GPU
1171
- auto gpubuf = m_driver->createFilledDeviceLocalGPUBufferOnDedMem (sampleSequence->getSize (),sampleSequence->getPointer ());
1172
- infos[1 ].desc = m_driver->createGPUBufferView (gpubuf.get (),asset::EF_R32G32_UINT);
1173
- }
1258
+ infos[1 ].desc = sampleSequence.getBufferView ();
1174
1259
setImageInfo (infos+2 ,asset::EIL_GENERAL,core::smart_refctd_ptr (m_accumulation));
1175
1260
setImageInfo (infos+5 ,asset::EIL_GENERAL,core::smart_refctd_ptr (m_albedoAcc));
1176
1261
setImageInfo (infos+6 ,asset::EIL_GENERAL,core::smart_refctd_ptr (m_normalAcc));
@@ -1301,7 +1386,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, core::s
1301
1386
std::cout << " \n Screen Sized Resources have been initialized (" << width << " x" << height << " )" << std::endl;
1302
1387
std::cout << " \t StaticViewData = " << staticViewDataBufferSize << " bytes" << std::endl;
1303
1388
std::cout << " \t ScrambleBuffer = " << scrambleBufferSize << " bytes" << std::endl;
1304
- std::cout << " \t SampleSequence = " << sampleSequence-> getSize () << " bytes" << std::endl;
1389
+ std::cout << " \t SampleSequence = " << sampleSequence. getBufferView ()-> getByteSize () << " bytes" << std::endl;
1305
1390
std::cout << " \t RayCount Buffer = " << m_rayCountBuffer->getSize () << " bytes" << std::endl;
1306
1391
for (auto i=0u ; i<2u ; i++)
1307
1392
std::cout << " \t Intersection Buffer[" << i << " ] = " << m_intersectionBuffer[i].buffer ->getSize () << " bytes" << std::endl;
@@ -1362,7 +1447,7 @@ void Renderer::deinitScreenSizedResources()
1362
1447
m_resolvePipeline = nullptr ;
1363
1448
1364
1449
m_staticViewData.imageDimensions = {0u , 0u };
1365
- m_staticViewData.pathDepth = 8u ;
1450
+ m_staticViewData.pathDepth = DefaultPathDepth ;
1366
1451
m_staticViewData.noRussianRouletteDepth = 5u ;
1367
1452
m_staticViewData.samplesPerPixelPerDispatch = 1u ;
1368
1453
m_totalRaysCast = 0ull ;
0 commit comments