@@ -23,8 +23,8 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
23
23
#ifdef _IRR_BUILD_OPTIX_
24
24
m_optixManager (), m_cudaStream(nullptr ), m_optixContext(),
25
25
#endif
26
- rrShapeCache ()/* , rrInstances()*/ , m_sceneBound(FLT_MAX,FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX),
27
- m_staticViewData{{0 .f ,0 .f ,0 .f },0u ,{0 .f ,0 .f },{0 .f ,0 .f },{0u ,0u },0u ,0u },m_raytraceCommonData{{},{},0 .f ,0u ,0u ,0 .f },
26
+ rrShapeCache (), rrInstances(), m_sceneBound(FLT_MAX,FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX),
27
+ m_maxRaysPerDispatch( 0 ), m_staticViewData{{0 .f ,0 .f ,0 .f },0u ,{0 .f ,0 .f },{0 .f ,0 .f },{0u ,0u },0u ,0u },m_raytraceCommonData{{},{},0 .f ,0u ,0u ,0 .f },
28
28
m_indirectDrawBuffers{nullptr },m_cullPushConstants{core::matrix4SIMD (),0u ,0u ,1 .f ,0xdeadbeefu },m_cullWorkGroups(0u ),
29
29
m_raygenWorkGroups{0u ,0u },m_resolveWorkGroups{0u ,0u },
30
30
m_visibilityBuffer (nullptr ),tmpTonemapBuffer(nullptr ),m_colorBuffer(nullptr )
@@ -353,6 +353,9 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
353
353
{aabb.MinEdge .X ,aabb.MinEdge .Y ,aabb.MinEdge .Z },drawID,
354
354
{aabb.MaxEdge .X ,aabb.MaxEdge .Y ,aabb.MaxEdge .Z },baseInstance
355
355
});
356
+
357
+ // TODO: set up smgr data
358
+ m_mock_smgr.m_objectData .push_back ({worldMatrix,nullptr ,{drawID}});
356
359
}
357
360
mdiData.emplace_back (DrawElementsIndirectCommand_t{
358
361
static_cast <uint32_t >(gpumb->getIndexCount ()), // pretty sure index count should be a uint32_t
@@ -382,6 +385,14 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
382
385
queueUpMDI (call);
383
386
}
384
387
388
+ // set up Radeon Rays instances
389
+ {
390
+ core::vector<ext::RadeonRays::MockSceneManager::ObjectGUID> ids (m_mock_smgr.m_objectData .size ());
391
+ std::iota (ids.begin (),ids.end (),0u );
392
+ m_rrManager->makeRRInstances (rrInstances, &m_mock_smgr, rrShapeCache, m_assetManager, ids.begin (), ids.end ());
393
+ m_rrManager->attachInstances (rrInstances.begin (), rrInstances.end ());
394
+ }
395
+
385
396
386
397
m_cullPushConstants.maxObjectCount = objectStaticData.size ();
387
398
m_cullPushConstants.currentCommandBufferIx = 0x0u ;
@@ -591,25 +602,43 @@ void Renderer::init(const SAssetBundle& meshes,
591
602
m_staticViewData.rcpPixelSize = { 1 .f /float (m_staticViewData.imageDimensions .x ),1 .f /float (m_staticViewData.imageDimensions .y ) };
592
603
m_staticViewData.rcpHalfPixelSize = { 0 .5f /float (m_staticViewData.imageDimensions .x ),0 .5f /float (m_staticViewData.imageDimensions .y ) };
593
604
}
605
+
606
+ // figure out dispatch sizes
607
+ m_raygenWorkGroups[0 ] = (m_staticViewData.imageDimensions .x -1u )/WORKGROUP_DIM+1u ;
608
+ m_raygenWorkGroups[1 ] = (m_staticViewData.imageDimensions .y -1u )/WORKGROUP_DIM+1u ;
609
+ m_resolveWorkGroups[0 ] = (m_staticViewData.imageDimensions .x -1u )/WORKGROUP_DIM+1u ;
610
+ m_resolveWorkGroups[1 ] = (m_staticViewData.imageDimensions .y -1u )/WORKGROUP_DIM+1u ;
611
+
594
612
const auto renderPixelCount = m_staticViewData.imageDimensions .x *m_staticViewData.imageDimensions .y ;
595
613
// figure out how much Samples Per Pixel Per Dispatch we can afford
596
614
size_t raygenBufferSize, intersectionBufferSize;
597
615
{
616
+ const auto misSamples = 2u ;
617
+ const auto minimumSampleCountPerDispatch = renderPixelCount*misSamples;
598
618
599
- const auto raygenBufferSizePerSample = static_cast <size_t >(renderPixelCount )*sizeof (::RadeonRays::ray);
619
+ const auto raygenBufferSizePerSample = static_cast <size_t >(minimumSampleCountPerDispatch )*sizeof (::RadeonRays::ray);
600
620
assert (raygenBufferSizePerSample<=rayBufferSize);
601
- const auto intersectionBufferSizePerSample = static_cast <size_t >(renderPixelCount )*sizeof (::RadeonRays::Intersection);
621
+ const auto intersectionBufferSizePerSample = static_cast <size_t >(minimumSampleCountPerDispatch )*sizeof (::RadeonRays::Intersection);
602
622
assert (intersectionBufferSizePerSample<=rayBufferSize);
603
623
m_staticViewData.samplesPerPixelPerDispatch = rayBufferSize/(raygenBufferSizePerSample+intersectionBufferSizePerSample);
604
624
assert (m_staticViewData.samplesPerPixelPerDispatch >= 1u );
605
625
printf (" Using %d samples\n " , m_staticViewData.samplesPerPixelPerDispatch );
606
626
607
627
m_staticViewData.samplesPerRowPerDispatch = m_staticViewData.imageDimensions .x *m_staticViewData.samplesPerPixelPerDispatch ;
608
628
629
+ m_maxRaysPerDispatch = minimumSampleCountPerDispatch*m_staticViewData.samplesPerPixelPerDispatch ;
609
630
raygenBufferSize = raygenBufferSizePerSample*m_staticViewData.samplesPerPixelPerDispatch ;
610
631
intersectionBufferSize = intersectionBufferSizePerSample*m_staticViewData.samplesPerPixelPerDispatch ;
611
632
}
612
633
634
+ // set up raycount buffer for RR
635
+ {
636
+ m_rayCountBuffer.buffer = m_driver->createFilledDeviceLocalGPUBufferOnDedMem (sizeof (uint32_t ),&raygenBufferSize);
637
+ m_rayCountBuffer.asRRBuffer = m_rrManager->linkBuffer (m_rayCountBuffer.buffer .get (), CL_MEM_READ_ONLY);
638
+
639
+ clEnqueueAcquireGLObjects (m_rrManager->getCLCommandQueue (), 1u , &m_rayCountBuffer.asRRBuffer .second , 0u , nullptr , nullptr );
640
+ }
641
+
613
642
// create out screen-sized textures
614
643
m_accumulation = createScreenSizedTexture (EF_R32G32_UINT);
615
644
m_tonemapOutput = createScreenSizedTexture (EF_A2B10G10R10_UNORM_PACK32);
@@ -626,11 +655,14 @@ void Renderer::init(const SAssetBundle& meshes,
626
655
constexpr uint32_t descriptorExclScanSum[4 ] = { 0u ,descriptorCountInSet[0 ],descriptorCountInSet[0 ]+descriptorCountInSet[1 ],descriptorCountInSet[0 ]+descriptorCountInSet[1 ]+descriptorCountInSet[2 ] };
627
656
628
657
629
- auto createEmptyBufferAndSetUpInfo = [&](IGPUDescriptorSet::SDescriptorInfo* info, size_t size) -> void
658
+ auto createEmptyInteropBufferAndSetUpInfo = [&](IGPUDescriptorSet::SDescriptorInfo* info, InteropBuffer& interopBuffer , size_t size) -> void
630
659
{
660
+ interopBuffer.buffer = m_driver->createDeviceLocalGPUBufferOnDedMem (size);
661
+ interopBuffer.asRRBuffer = m_rrManager->linkBuffer (interopBuffer.buffer .get (), CL_MEM_READ_ONLY);
662
+
631
663
info->buffer .size = size;
632
664
info->buffer .offset = 0u ;
633
- info->desc = m_driver-> createDeviceLocalGPUBufferOnDedMem (size );
665
+ info->desc = core::smart_refctd_ptr (interopBuffer. buffer );
634
666
};
635
667
auto createFilledBufferAndSetUpInfo = [&](IGPUDescriptorSet::SDescriptorInfo* info, size_t size, const void * data) -> void
636
668
{
@@ -679,7 +711,7 @@ void Renderer::init(const SAssetBundle& meshes,
679
711
auto commonWrites = writes+descriptorExclScanSum[0 ];
680
712
createFilledBufferAndSetUpInfoFromStruct (commonInfos+0 ,m_staticViewData);
681
713
setImageInfo (commonInfos+1 ,asset::EIL_GENERAL,core::smart_refctd_ptr (m_accumulation));
682
- createEmptyBufferAndSetUpInfo (commonInfos+2 ,raygenBufferSize);
714
+ createEmptyInteropBufferAndSetUpInfo (commonInfos+2 ,m_rayBuffer ,raygenBufferSize);
683
715
createFilledBufferAndSetUpInfoFromVector (commonInfos+3 ,initData.lightCDF );
684
716
createFilledBufferAndSetUpInfoFromVector (commonInfos+4 ,initData.lights );
685
717
createFilledBufferAndSetUpInfoFromVector (commonInfos+5 ,initData.lightRadiances );
@@ -733,7 +765,7 @@ void Renderer::init(const SAssetBundle& meshes,
733
765
{
734
766
auto resolveInfos = infos+descriptorExclScanSum[2 ];
735
767
auto resolveWrites = writes+descriptorExclScanSum[2 ];
736
- createEmptyBufferAndSetUpInfo (resolveInfos+0 ,intersectionBufferSize);
768
+ createEmptyInteropBufferAndSetUpInfo (resolveInfos+0 ,m_intersectionBuffer ,intersectionBufferSize);
737
769
setImageInfo (resolveInfos+1 ,asset::EIL_GENERAL,core::smart_refctd_ptr (m_tonemapOutput));
738
770
739
771
@@ -742,17 +774,6 @@ void Renderer::init(const SAssetBundle& meshes,
742
774
743
775
m_driver->updateDescriptorSets (descriptorExclScanSum[3 ], writes, 0u , nullptr );
744
776
}
745
-
746
- // set up radeon rays instances
747
- {
748
- #if TODO
749
- core::vector<int32_t > ids (nodes.size ());
750
- std::iota (ids.begin (), ids.end (), 0 );
751
- auto nodesBegin = &nodes.data ()->get ();
752
- m_rrManager->makeRRInstances (rrInstances, rrShapeCache, m_assetManager, nodesBegin, nodesBegin+nodes.size (), ids.data ());
753
- m_rrManager->attachInstances (rrInstances.begin (), rrInstances.end ());
754
- #endif
755
- }
756
777
}
757
778
758
779
@@ -781,31 +802,6 @@ void Renderer::init(const SAssetBundle& meshes,
781
802
auto spec = m_driver->createGPUSpecializedShader (shader.get (), info);
782
803
m_compostPipeline = m_driver->createGPUComputePipeline (nullptr , core::smart_refctd_ptr (m_compostLayout), std::move (spec));
783
804
}
784
-
785
- //
786
- constexpr auto RAYGEN_WORK_GROUP_DIM = 16u ;
787
- m_raygenWorkGroups[0 ] = (renderSize[0 ]+RAYGEN_WORK_GROUP_DIM-1 )/RAYGEN_WORK_GROUP_DIM;
788
- m_raygenWorkGroups[1 ] = (renderSize[1 ]+RAYGEN_WORK_GROUP_DIM-1 )/RAYGEN_WORK_GROUP_DIM;
789
- constexpr auto RESOLVE_WORK_GROUP_DIM = 32u ;
790
- m_resolveWorkGroups[0 ] = (renderSize[0 ]+RESOLVE_WORK_GROUP_DIM-1 )/RESOLVE_WORK_GROUP_DIM;
791
- m_resolveWorkGroups[1 ] = (renderSize[1 ]+RESOLVE_WORK_GROUP_DIM-1 )/RESOLVE_WORK_GROUP_DIM;
792
-
793
- raygenBufferSize *= m_samplesPerPixelPerDispatch;
794
- m_rayBuffer = m_driver->createDeviceLocalGPUBufferOnDedMem (raygenBufferSize);
795
-
796
- shadowBufferSize *= m_samplesPerPixelPerDispatch;
797
- m_intersectionBuffer = m_driver->createDeviceLocalGPUBufferOnDedMem (shadowBufferSize);
798
-
799
- m_rayCountBuffer = m_driver->createFilledDeviceLocalGPUBufferOnDedMem (sizeof (uint32_t ),&m_rayCountPerDispatch);
800
-
801
- m_rayBufferAsRR = m_rrManager->linkBuffer (m_rayBuffer.get (), CL_MEM_READ_WRITE);
802
- // TODO: clear hit buffer to -1 before usage
803
- m_intersectionBufferAsRR = m_rrManager->linkBuffer (m_intersectionBuffer.get (), CL_MEM_READ_WRITE);
804
- m_rayCountBufferAsRR = m_rrManager->linkBuffer (m_rayCountBuffer.get (), CL_MEM_READ_ONLY);
805
-
806
- const cl_mem clObjects[] = { m_rayCountBufferAsRR.second };
807
- auto objCount = sizeof (clObjects)/sizeof (cl_mem);
808
- clEnqueueAcquireGLObjects (m_rrManager->getCLCommandQueue (), objCount, clObjects, 0u , nullptr , nullptr );
809
805
#endif
810
806
811
807
m_visibilityBuffer = m_driver->addFrameBuffer ();
@@ -909,39 +905,23 @@ void Renderer::deinit()
909
905
m_colorBuffer = nullptr ;
910
906
}
911
907
m_accumulation = m_tonemapOutput = nullptr ;
912
-
913
- m_resolveWorkGroups[0 ] = m_resolveWorkGroups[1 ] = 0u ;
914
- m_resolveDS = nullptr ;
915
-
916
- #if TODO
917
- // release OpenCL objects and wait for OpenCL to finish
918
- const cl_mem clObjects[] = { m_rayCountBufferAsRR.second };
919
- auto objCount = sizeof (clObjects) / sizeof (cl_mem);
920
- clEnqueueReleaseGLObjects (commandQueue, objCount, clObjects, 1u , nullptr , nullptr );
908
+
909
+ auto deleteInteropBuffer = [&](InteropBuffer& buffer) -> void
910
+ {
911
+ if (buffer.asRRBuffer .first )
912
+ m_rrManager->deleteRRBuffer (buffer.asRRBuffer .first );
913
+ buffer = {};
914
+ };
915
+ deleteInteropBuffer (m_intersectionBuffer);
916
+ deleteInteropBuffer (m_rayBuffer);
917
+ // release the last OpenCL object and wait for OpenCL to finish
918
+ clEnqueueReleaseGLObjects (commandQueue, 1u , &m_rayCountBuffer.asRRBuffer .second , 1u , nullptr , nullptr );
921
919
clFlush (commandQueue);
922
920
clFinish (commandQueue);
921
+ deleteInteropBuffer (m_rayCountBuffer);
923
922
924
- if (m_rayBufferAsRR.first )
925
- {
926
- m_rrManager->deleteRRBuffer (m_rayBufferAsRR.first );
927
- m_rayBufferAsRR = {nullptr ,nullptr };
928
- }
929
- if (m_intersectionBufferAsRR.first )
930
- {
931
- m_rrManager->deleteRRBuffer (m_intersectionBufferAsRR.first );
932
- m_intersectionBufferAsRR = {nullptr ,nullptr };
933
- }
934
- if (m_rayCountBufferAsRR.first )
935
- {
936
- m_rrManager->deleteRRBuffer (m_rayCountBufferAsRR.first );
937
- m_rayCountBufferAsRR = {nullptr ,nullptr };
938
- }
939
- m_rayBuffer = m_intersectionBuffer = m_rayCountBuffer = nullptr ;
940
-
941
- m_rrManager->detachInstances (rrInstances.begin (),rrInstances.end ());
942
- m_rrManager->deleteInstances (rrInstances.begin (),rrInstances.end ());
943
- rrInstances.clear (); // TODO MOVE
944
- #endif
923
+ m_resolveWorkGroups[0 ] = m_resolveWorkGroups[1 ] = 0u ;
924
+ m_resolveDS = nullptr ;
945
925
946
926
m_raygenWorkGroups[0 ] = m_raygenWorkGroups[1 ] = 0u ;
947
927
m_raygenDS = nullptr ;
@@ -958,10 +938,16 @@ void Renderer::deinit()
958
938
959
939
m_raytraceCommonData = {{},{},0 .f ,0u ,0u ,0 .f };
960
940
m_staticViewData = {{0 .f ,0 .f ,0 .f },0u ,{0 .f ,0 .f },{0 .f ,0 .f },{0u ,0u },0u ,0u };
941
+ m_maxRaysPerDispatch = 0u ;
961
942
m_sceneBound = core::aabbox3df (FLT_MAX, FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX);
962
943
963
- m_rrManager->deleteShapes (rrShapeCache.begin (), rrShapeCache.end ());
964
- rrShapeCache.clear ();
944
+ m_rrManager->detachInstances (rrInstances.begin (),rrInstances.end ());
945
+ m_rrManager->deleteInstances (rrInstances.begin (),rrInstances.end ());
946
+ rrInstances.clear ();
947
+ m_mock_smgr = {};
948
+
949
+ m_rrManager->deleteShapes (rrShapeCache.m_gpuAssociative .begin (), rrShapeCache.m_gpuAssociative .end ());
950
+ rrShapeCache = {};
965
951
}
966
952
967
953
@@ -978,6 +964,7 @@ void Renderer::render(irr::ITimer* timer)
978
964
camera->render ();
979
965
980
966
const auto currentViewProj = camera->getConcatenatedMatrix ();
967
+ // TODO: instead of rasterizing vis-buffer only once, subpixel jitter it to obtain AA
981
968
if (!core::equals (prevViewProj,currentViewProj,core::ROUNDING_ERROR<core::matrix4SIMD>()*1000.0 ))
982
969
{
983
970
m_raytraceCommonData.framesDispatched = 0u ;
@@ -1047,25 +1034,24 @@ void Renderer::render(irr::ITimer* timer)
1047
1034
}
1048
1035
1049
1036
// do radeon rays
1050
- #if TODO
1051
- m_rrManager->update (rrInstances);
1052
- #endif
1037
+ m_rrManager->update (&m_mock_smgr,rrInstances.begin (),rrInstances.end ());
1053
1038
if (m_rrManager->hasImplicitCL2GLSync ())
1054
1039
glFlush ();
1055
1040
else
1056
1041
glFinish ();
1057
1042
1058
- auto commandQueue = m_rrManager-> getCLCommandQueue ();
1043
+ if ( false ) // TODO
1059
1044
{
1060
- #if TODO
1061
- const cl_mem clObjects[] = {m_rayBufferAsRR.second ,m_intersectionBufferAsRR.second };
1062
- auto objCount = sizeof (clObjects)/sizeof (cl_mem);
1045
+ auto commandQueue = m_rrManager->getCLCommandQueue ();
1046
+
1047
+ const cl_mem clObjects[] = {m_rayBuffer.asRRBuffer .second ,m_intersectionBuffer.asRRBuffer .second };
1048
+ const auto objCount = sizeof (clObjects)/sizeof (cl_mem);
1063
1049
1064
1050
cl_event acquired = nullptr ;
1065
1051
clEnqueueAcquireGLObjects (commandQueue,objCount,clObjects,0u ,nullptr ,&acquired);
1066
1052
1067
1053
clEnqueueWaitForEvents (commandQueue,1u ,&acquired);
1068
- m_rrManager->getRadeonRaysAPI ()->QueryOcclusion (m_rayBufferAsRR. first ,m_rayCountBufferAsRR. first ,m_rayCountPerDispatch,m_intersectionBufferAsRR .first ,nullptr ,nullptr );
1054
+ m_rrManager->getRadeonRaysAPI ()->QueryIntersection (m_rayBuffer. asRRBuffer . first ,m_rayCountBuffer. asRRBuffer . first ,m_maxRaysPerDispatch,m_intersectionBuffer. asRRBuffer .first ,nullptr ,nullptr );
1069
1055
cl_event raycastDone = nullptr ;
1070
1056
clEnqueueMarker (commandQueue,&raycastDone);
1071
1057
@@ -1081,7 +1067,6 @@ void Renderer::render(irr::ITimer* timer)
1081
1067
clFlush (commandQueue);
1082
1068
clWaitForEvents (1u , &released);
1083
1069
}
1084
- #endif
1085
1070
}
1086
1071
1087
1072
// use raycast results
0 commit comments