Skip to content

Commit 7a35e7f

Browse files
last stretch before trying to compile the raygen and resolve compute shaders
1 parent c543216 commit 7a35e7f

File tree

2 files changed

+74
-99
lines changed

2 files changed

+74
-99
lines changed

examples_tests/22.RaytracedAO/dirty_source/ExtraCrap.cpp

Lines changed: 70 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
2323
#ifdef _IRR_BUILD_OPTIX_
2424
m_optixManager(), m_cudaStream(nullptr), m_optixContext(),
2525
#endif
26-
rrShapeCache()/*, rrInstances()*/, m_sceneBound(FLT_MAX,FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX),
27-
m_staticViewData{{0.f,0.f,0.f},0u,{0.f,0.f},{0.f,0.f},{0u,0u},0u,0u},m_raytraceCommonData{{},{},0.f,0u,0u,0.f},
26+
rrShapeCache(), rrInstances(), m_sceneBound(FLT_MAX,FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX),
27+
m_maxRaysPerDispatch(0), m_staticViewData{{0.f,0.f,0.f},0u,{0.f,0.f},{0.f,0.f},{0u,0u},0u,0u},m_raytraceCommonData{{},{},0.f,0u,0u,0.f},
2828
m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),0u,0u,1.f,0xdeadbeefu},m_cullWorkGroups(0u),
2929
m_raygenWorkGroups{0u,0u},m_resolveWorkGroups{0u,0u},
3030
m_visibilityBuffer(nullptr),tmpTonemapBuffer(nullptr),m_colorBuffer(nullptr)
@@ -353,6 +353,9 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
353353
{aabb.MinEdge.X,aabb.MinEdge.Y,aabb.MinEdge.Z},drawID,
354354
{aabb.MaxEdge.X,aabb.MaxEdge.Y,aabb.MaxEdge.Z},baseInstance
355355
});
356+
357+
// TODO: set up smgr data
358+
m_mock_smgr.m_objectData.push_back({worldMatrix,nullptr,{drawID}});
356359
}
357360
mdiData.emplace_back(DrawElementsIndirectCommand_t{
358361
static_cast<uint32_t>(gpumb->getIndexCount()), // pretty sure index count should be a uint32_t
@@ -382,6 +385,14 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh
382385
queueUpMDI(call);
383386
}
384387

388+
// set up Radeon Rays instances
389+
{
390+
core::vector<ext::RadeonRays::MockSceneManager::ObjectGUID> ids(m_mock_smgr.m_objectData.size());
391+
std::iota(ids.begin(),ids.end(),0u);
392+
m_rrManager->makeRRInstances(rrInstances, &m_mock_smgr, rrShapeCache, m_assetManager, ids.begin(), ids.end());
393+
m_rrManager->attachInstances(rrInstances.begin(), rrInstances.end());
394+
}
395+
385396

386397
m_cullPushConstants.maxObjectCount = objectStaticData.size();
387398
m_cullPushConstants.currentCommandBufferIx = 0x0u;
@@ -591,25 +602,43 @@ void Renderer::init(const SAssetBundle& meshes,
591602
m_staticViewData.rcpPixelSize = { 1.f/float(m_staticViewData.imageDimensions.x),1.f/float(m_staticViewData.imageDimensions.y) };
592603
m_staticViewData.rcpHalfPixelSize = { 0.5f/float(m_staticViewData.imageDimensions.x),0.5f/float(m_staticViewData.imageDimensions.y) };
593604
}
605+
606+
// figure out dispatch sizes
607+
m_raygenWorkGroups[0] = (m_staticViewData.imageDimensions.x-1u)/WORKGROUP_DIM+1u;
608+
m_raygenWorkGroups[1] = (m_staticViewData.imageDimensions.y-1u)/WORKGROUP_DIM+1u;
609+
m_resolveWorkGroups[0] = (m_staticViewData.imageDimensions.x-1u)/WORKGROUP_DIM+1u;
610+
m_resolveWorkGroups[1] = (m_staticViewData.imageDimensions.y-1u)/WORKGROUP_DIM+1u;
611+
594612
const auto renderPixelCount = m_staticViewData.imageDimensions.x*m_staticViewData.imageDimensions.y;
595613
// figure out how much Samples Per Pixel Per Dispatch we can afford
596614
size_t raygenBufferSize, intersectionBufferSize;
597615
{
616+
const auto misSamples = 2u;
617+
const auto minimumSampleCountPerDispatch = renderPixelCount*misSamples;
598618

599-
const auto raygenBufferSizePerSample = static_cast<size_t>(renderPixelCount)*sizeof(::RadeonRays::ray);
619+
const auto raygenBufferSizePerSample = static_cast<size_t>(minimumSampleCountPerDispatch)*sizeof(::RadeonRays::ray);
600620
assert(raygenBufferSizePerSample<=rayBufferSize);
601-
const auto intersectionBufferSizePerSample = static_cast<size_t>(renderPixelCount)*sizeof(::RadeonRays::Intersection);
621+
const auto intersectionBufferSizePerSample = static_cast<size_t>(minimumSampleCountPerDispatch)*sizeof(::RadeonRays::Intersection);
602622
assert(intersectionBufferSizePerSample<=rayBufferSize);
603623
m_staticViewData.samplesPerPixelPerDispatch = rayBufferSize/(raygenBufferSizePerSample+intersectionBufferSizePerSample);
604624
assert(m_staticViewData.samplesPerPixelPerDispatch >= 1u);
605625
printf("Using %d samples\n", m_staticViewData.samplesPerPixelPerDispatch);
606626

607627
m_staticViewData.samplesPerRowPerDispatch = m_staticViewData.imageDimensions.x*m_staticViewData.samplesPerPixelPerDispatch;
608628

629+
m_maxRaysPerDispatch = minimumSampleCountPerDispatch*m_staticViewData.samplesPerPixelPerDispatch;
609630
raygenBufferSize = raygenBufferSizePerSample*m_staticViewData.samplesPerPixelPerDispatch;
610631
intersectionBufferSize = intersectionBufferSizePerSample*m_staticViewData.samplesPerPixelPerDispatch;
611632
}
612633

634+
// set up raycount buffer for RR
635+
{
636+
m_rayCountBuffer.buffer = m_driver->createFilledDeviceLocalGPUBufferOnDedMem(sizeof(uint32_t),&raygenBufferSize);
637+
m_rayCountBuffer.asRRBuffer = m_rrManager->linkBuffer(m_rayCountBuffer.buffer.get(), CL_MEM_READ_ONLY);
638+
639+
clEnqueueAcquireGLObjects(m_rrManager->getCLCommandQueue(), 1u, &m_rayCountBuffer.asRRBuffer.second, 0u, nullptr, nullptr);
640+
}
641+
613642
// create out screen-sized textures
614643
m_accumulation = createScreenSizedTexture(EF_R32G32_UINT);
615644
m_tonemapOutput = createScreenSizedTexture(EF_A2B10G10R10_UNORM_PACK32);
@@ -626,11 +655,14 @@ void Renderer::init(const SAssetBundle& meshes,
626655
constexpr uint32_t descriptorExclScanSum[4] = { 0u,descriptorCountInSet[0],descriptorCountInSet[0]+descriptorCountInSet[1],descriptorCountInSet[0]+descriptorCountInSet[1]+descriptorCountInSet[2] };
627656

628657

629-
auto createEmptyBufferAndSetUpInfo = [&](IGPUDescriptorSet::SDescriptorInfo* info, size_t size) -> void
658+
auto createEmptyInteropBufferAndSetUpInfo = [&](IGPUDescriptorSet::SDescriptorInfo* info, InteropBuffer& interopBuffer, size_t size) -> void
630659
{
660+
interopBuffer.buffer = m_driver->createDeviceLocalGPUBufferOnDedMem(size);
661+
interopBuffer.asRRBuffer = m_rrManager->linkBuffer(interopBuffer.buffer.get(), CL_MEM_READ_ONLY);
662+
631663
info->buffer.size = size;
632664
info->buffer.offset = 0u;
633-
info->desc = m_driver->createDeviceLocalGPUBufferOnDedMem(size);
665+
info->desc = core::smart_refctd_ptr(interopBuffer.buffer);
634666
};
635667
auto createFilledBufferAndSetUpInfo = [&](IGPUDescriptorSet::SDescriptorInfo* info, size_t size, const void* data) -> void
636668
{
@@ -679,7 +711,7 @@ void Renderer::init(const SAssetBundle& meshes,
679711
auto commonWrites = writes+descriptorExclScanSum[0];
680712
createFilledBufferAndSetUpInfoFromStruct(commonInfos+0,m_staticViewData);
681713
setImageInfo(commonInfos+1,asset::EIL_GENERAL,core::smart_refctd_ptr(m_accumulation));
682-
createEmptyBufferAndSetUpInfo(commonInfos+2,raygenBufferSize);
714+
createEmptyInteropBufferAndSetUpInfo(commonInfos+2,m_rayBuffer,raygenBufferSize);
683715
createFilledBufferAndSetUpInfoFromVector(commonInfos+3,initData.lightCDF);
684716
createFilledBufferAndSetUpInfoFromVector(commonInfos+4,initData.lights);
685717
createFilledBufferAndSetUpInfoFromVector(commonInfos+5,initData.lightRadiances);
@@ -733,7 +765,7 @@ void Renderer::init(const SAssetBundle& meshes,
733765
{
734766
auto resolveInfos = infos+descriptorExclScanSum[2];
735767
auto resolveWrites = writes+descriptorExclScanSum[2];
736-
createEmptyBufferAndSetUpInfo(resolveInfos+0,intersectionBufferSize);
768+
createEmptyInteropBufferAndSetUpInfo(resolveInfos+0,m_intersectionBuffer,intersectionBufferSize);
737769
setImageInfo(resolveInfos+1,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput));
738770

739771

@@ -742,17 +774,6 @@ void Renderer::init(const SAssetBundle& meshes,
742774

743775
m_driver->updateDescriptorSets(descriptorExclScanSum[3], writes, 0u, nullptr);
744776
}
745-
746-
// set up radeon rays instances
747-
{
748-
#if TODO
749-
core::vector<int32_t> ids(nodes.size());
750-
std::iota(ids.begin(), ids.end(), 0);
751-
auto nodesBegin = &nodes.data()->get();
752-
m_rrManager->makeRRInstances(rrInstances, rrShapeCache, m_assetManager, nodesBegin, nodesBegin+nodes.size(), ids.data());
753-
m_rrManager->attachInstances(rrInstances.begin(), rrInstances.end());
754-
#endif
755-
}
756777
}
757778

758779

@@ -781,31 +802,6 @@ void Renderer::init(const SAssetBundle& meshes,
781802
auto spec = m_driver->createGPUSpecializedShader(shader.get(), info);
782803
m_compostPipeline = m_driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(m_compostLayout), std::move(spec));
783804
}
784-
785-
//
786-
constexpr auto RAYGEN_WORK_GROUP_DIM = 16u;
787-
m_raygenWorkGroups[0] = (renderSize[0]+RAYGEN_WORK_GROUP_DIM-1)/RAYGEN_WORK_GROUP_DIM;
788-
m_raygenWorkGroups[1] = (renderSize[1]+RAYGEN_WORK_GROUP_DIM-1)/RAYGEN_WORK_GROUP_DIM;
789-
constexpr auto RESOLVE_WORK_GROUP_DIM = 32u;
790-
m_resolveWorkGroups[0] = (renderSize[0]+RESOLVE_WORK_GROUP_DIM-1)/RESOLVE_WORK_GROUP_DIM;
791-
m_resolveWorkGroups[1] = (renderSize[1]+RESOLVE_WORK_GROUP_DIM-1)/RESOLVE_WORK_GROUP_DIM;
792-
793-
raygenBufferSize *= m_samplesPerPixelPerDispatch;
794-
m_rayBuffer = m_driver->createDeviceLocalGPUBufferOnDedMem(raygenBufferSize);
795-
796-
shadowBufferSize *= m_samplesPerPixelPerDispatch;
797-
m_intersectionBuffer = m_driver->createDeviceLocalGPUBufferOnDedMem(shadowBufferSize);
798-
799-
m_rayCountBuffer = m_driver->createFilledDeviceLocalGPUBufferOnDedMem(sizeof(uint32_t),&m_rayCountPerDispatch);
800-
801-
m_rayBufferAsRR = m_rrManager->linkBuffer(m_rayBuffer.get(), CL_MEM_READ_WRITE);
802-
// TODO: clear hit buffer to -1 before usage
803-
m_intersectionBufferAsRR = m_rrManager->linkBuffer(m_intersectionBuffer.get(), CL_MEM_READ_WRITE);
804-
m_rayCountBufferAsRR = m_rrManager->linkBuffer(m_rayCountBuffer.get(), CL_MEM_READ_ONLY);
805-
806-
const cl_mem clObjects[] = { m_rayCountBufferAsRR.second };
807-
auto objCount = sizeof(clObjects)/sizeof(cl_mem);
808-
clEnqueueAcquireGLObjects(m_rrManager->getCLCommandQueue(), objCount, clObjects, 0u, nullptr, nullptr);
809805
#endif
810806

811807
m_visibilityBuffer = m_driver->addFrameBuffer();
@@ -909,39 +905,23 @@ void Renderer::deinit()
909905
m_colorBuffer = nullptr;
910906
}
911907
m_accumulation = m_tonemapOutput = nullptr;
912-
913-
m_resolveWorkGroups[0] = m_resolveWorkGroups[1] = 0u;
914-
m_resolveDS = nullptr;
915-
916-
#if TODO
917-
// release OpenCL objects and wait for OpenCL to finish
918-
const cl_mem clObjects[] = { m_rayCountBufferAsRR.second };
919-
auto objCount = sizeof(clObjects) / sizeof(cl_mem);
920-
clEnqueueReleaseGLObjects(commandQueue, objCount, clObjects, 1u, nullptr, nullptr);
908+
909+
auto deleteInteropBuffer = [&](InteropBuffer& buffer) -> void
910+
{
911+
if (buffer.asRRBuffer.first)
912+
m_rrManager->deleteRRBuffer(buffer.asRRBuffer.first);
913+
buffer = {};
914+
};
915+
deleteInteropBuffer(m_intersectionBuffer);
916+
deleteInteropBuffer(m_rayBuffer);
917+
// release the last OpenCL object and wait for OpenCL to finish
918+
clEnqueueReleaseGLObjects(commandQueue, 1u, &m_rayCountBuffer.asRRBuffer.second, 1u, nullptr, nullptr);
921919
clFlush(commandQueue);
922920
clFinish(commandQueue);
921+
deleteInteropBuffer(m_rayCountBuffer);
923922

924-
if (m_rayBufferAsRR.first)
925-
{
926-
m_rrManager->deleteRRBuffer(m_rayBufferAsRR.first);
927-
m_rayBufferAsRR = {nullptr,nullptr};
928-
}
929-
if (m_intersectionBufferAsRR.first)
930-
{
931-
m_rrManager->deleteRRBuffer(m_intersectionBufferAsRR.first);
932-
m_intersectionBufferAsRR = {nullptr,nullptr};
933-
}
934-
if (m_rayCountBufferAsRR.first)
935-
{
936-
m_rrManager->deleteRRBuffer(m_rayCountBufferAsRR.first);
937-
m_rayCountBufferAsRR = {nullptr,nullptr};
938-
}
939-
m_rayBuffer = m_intersectionBuffer = m_rayCountBuffer = nullptr;
940-
941-
m_rrManager->detachInstances(rrInstances.begin(),rrInstances.end());
942-
m_rrManager->deleteInstances(rrInstances.begin(),rrInstances.end());
943-
rrInstances.clear(); // TODO MOVE
944-
#endif
923+
m_resolveWorkGroups[0] = m_resolveWorkGroups[1] = 0u;
924+
m_resolveDS = nullptr;
945925

946926
m_raygenWorkGroups[0] = m_raygenWorkGroups[1] = 0u;
947927
m_raygenDS = nullptr;
@@ -958,10 +938,16 @@ void Renderer::deinit()
958938

959939
m_raytraceCommonData = {{},{},0.f,0u,0u,0.f};
960940
m_staticViewData = {{0.f,0.f,0.f},0u,{0.f,0.f},{0.f,0.f},{0u,0u},0u,0u};
941+
m_maxRaysPerDispatch = 0u;
961942
m_sceneBound = core::aabbox3df(FLT_MAX, FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX);
962943

963-
m_rrManager->deleteShapes(rrShapeCache.begin(), rrShapeCache.end());
964-
rrShapeCache.clear();
944+
m_rrManager->detachInstances(rrInstances.begin(),rrInstances.end());
945+
m_rrManager->deleteInstances(rrInstances.begin(),rrInstances.end());
946+
rrInstances.clear();
947+
m_mock_smgr = {};
948+
949+
m_rrManager->deleteShapes(rrShapeCache.m_gpuAssociative.begin(), rrShapeCache.m_gpuAssociative.end());
950+
rrShapeCache = {};
965951
}
966952

967953

@@ -978,6 +964,7 @@ void Renderer::render(irr::ITimer* timer)
978964
camera->render();
979965

980966
const auto currentViewProj = camera->getConcatenatedMatrix();
967+
// TODO: instead of rasterizing vis-buffer only once, subpixel jitter it to obtain AA
981968
if (!core::equals(prevViewProj,currentViewProj,core::ROUNDING_ERROR<core::matrix4SIMD>()*1000.0))
982969
{
983970
m_raytraceCommonData.framesDispatched = 0u;
@@ -1047,25 +1034,24 @@ void Renderer::render(irr::ITimer* timer)
10471034
}
10481035

10491036
// do radeon rays
1050-
#if TODO
1051-
m_rrManager->update(rrInstances);
1052-
#endif
1037+
m_rrManager->update(&m_mock_smgr,rrInstances.begin(),rrInstances.end());
10531038
if (m_rrManager->hasImplicitCL2GLSync())
10541039
glFlush();
10551040
else
10561041
glFinish();
10571042

1058-
auto commandQueue = m_rrManager->getCLCommandQueue();
1043+
if (false) // TODO
10591044
{
1060-
#if TODO
1061-
const cl_mem clObjects[] = {m_rayBufferAsRR.second,m_intersectionBufferAsRR.second};
1062-
auto objCount = sizeof(clObjects)/sizeof(cl_mem);
1045+
auto commandQueue = m_rrManager->getCLCommandQueue();
1046+
1047+
const cl_mem clObjects[] = {m_rayBuffer.asRRBuffer.second,m_intersectionBuffer.asRRBuffer.second};
1048+
const auto objCount = sizeof(clObjects)/sizeof(cl_mem);
10631049

10641050
cl_event acquired = nullptr;
10651051
clEnqueueAcquireGLObjects(commandQueue,objCount,clObjects,0u,nullptr,&acquired);
10661052

10671053
clEnqueueWaitForEvents(commandQueue,1u,&acquired);
1068-
m_rrManager->getRadeonRaysAPI()->QueryOcclusion(m_rayBufferAsRR.first,m_rayCountBufferAsRR.first,m_rayCountPerDispatch,m_intersectionBufferAsRR.first,nullptr,nullptr);
1054+
m_rrManager->getRadeonRaysAPI()->QueryIntersection(m_rayBuffer.asRRBuffer.first,m_rayCountBuffer.asRRBuffer.first,m_maxRaysPerDispatch,m_intersectionBuffer.asRRBuffer.first,nullptr,nullptr);
10691055
cl_event raycastDone = nullptr;
10701056
clEnqueueMarker(commandQueue,&raycastDone);
10711057

@@ -1081,7 +1067,6 @@ void Renderer::render(irr::ITimer* timer)
10811067
clFlush(commandQueue);
10821068
clWaitForEvents(1u, &released);
10831069
}
1084-
#endif
10851070
}
10861071

10871072
// use raycast results

examples_tests/22.RaytracedAO/dirty_source/ExtraCrap.h

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,7 @@ class Renderer : public irr::core::IReferenceCounted, public irr::core::Interfac
9090

9191
irr::core::smart_refctd_ptr<irr::video::IGPUImageView> createScreenSizedTexture(irr::asset::E_FORMAT format);
9292

93-
#if TODO
94-
core::smart_refctd_ptr<video::IGPUDescriptorSetLayout> createDS2layoutCompost(bool useDenoiser, core::smart_refctd_ptr<video::IGPUSampler>& nearstSampler);
95-
core::smart_refctd_ptr<video::IGPUDescriptorSet> createDS2Compost(bool useDenoiser,
96-
core::smart_refctd_ptr<video::IGPUSampler>& nearestSampler
97-
);
98-
core::smart_refctd_ptr<video::IGPUPipelineLayout> createLayoutCompost();
99-
100-
core::smart_refctd_ptr<video::IGPUDescriptorSetLayout> createDS2layoutRaygen(core::smart_refctd_ptr<video::IGPUSampler>& nearstSampler);
101-
core::smart_refctd_ptr<video::IGPUDescriptorSet> createDS2Raygen(core::smart_refctd_ptr<video::IGPUSampler>& nearstSampler);
102-
core::smart_refctd_ptr<video::IGPUPipelineLayout> createLayoutRaygen();
103-
#endif
93+
10494
// constants
10595
const bool m_useDenoiser;
10696

@@ -134,12 +124,12 @@ class Renderer : public irr::core::IReferenceCounted, public irr::core::Interfac
134124

135125

136126
// scene specific data
127+
irr::ext::RadeonRays::MockSceneManager m_mock_smgr;
137128
irr::ext::RadeonRays::Manager::MeshBufferRRShapeCache rrShapeCache;
138-
#if TODO
139-
irr::ext::RadeonRays::Manager::MeshNodeRRInstanceCache rrInstances;
140-
#endif
129+
irr::ext::RadeonRays::Manager::NblInstanceRRInstanceCache rrInstances;
141130

142131
irr::core::aabbox3df m_sceneBound;
132+
uint32_t m_maxRaysPerDispatch;
143133
StaticViewData_t m_staticViewData;
144134
RaytraceShaderCommonData_t m_raytraceCommonData;
145135

0 commit comments

Comments
 (0)