Skip to content

Commit 769d908

Browse files
Merge pull request #124 from Przemog1/culling
Culling
2 parents 799b343 + b2b2c90 commit 769d908

File tree

6 files changed

+287
-7
lines changed

6 files changed

+287
-7
lines changed

examples_tests/41.VisibilityBuffer/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ if(NOT RES)
44
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
55
endif()
66

7-
nbl_create_executable_project("" "" "" "")
7+
nbl_create_executable_project(../../src/nbl/ext/DebugDraw/CDraw3DLine.cpp "" "" "")
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#version 430 core
2+
3+
#include "rasterizationCommon.h"
4+
layout(local_size_x = WORKGROUP_SIZE) in;
5+
6+
#include <nbl/builtin/glsl/utils/indirect_commands.glsl>
7+
8+
layout(set=0, binding=0, std430, row_major) restrict readonly buffer PerInstanceCull
9+
{
10+
CullData_t cullData[];
11+
};
12+
layout(set=0, binding=1, std430) restrict coherent buffer IndirectDraws
13+
{
14+
nbl_glsl_DrawElementsIndirectCommand_t draws[];
15+
} commandBuff;
16+
layout(set=0, binding=2, std430) restrict coherent buffer MVPs
17+
{
18+
mat4 mvps[];
19+
} mvpBuff;
20+
21+
22+
23+
layout(push_constant, row_major) uniform PushConstants
24+
{
25+
CullShaderData_t data;
26+
} pc;
27+
28+
29+
30+
#include <nbl/builtin/glsl/utils/culling.glsl>
31+
#include <nbl/builtin/glsl/utils/transform.glsl>
32+
33+
34+
void main()
35+
{
36+
if (gl_GlobalInvocationID.x >= pc.data.maxBatchCount)
37+
return;
38+
39+
mvpBuff.mvps[gl_GlobalInvocationID.x] = pc.data.viewProjMatrix;
40+
41+
if (bool(pc.data.freezeCulling))
42+
return;
43+
44+
const CullData_t batchCullData = cullData[gl_GlobalInvocationID.x];
45+
46+
const mat2x3 bbox = mat2x3(batchCullData.aabbMinEdge,batchCullData.aabbMaxEdge);
47+
bool couldBeVisible = nbl_glsl_couldBeVisible(pc.data.viewProjMatrix,bbox);
48+
49+
commandBuff.draws[batchCullData.drawCommandGUID].instanceCount = couldBeVisible == true ? 1 : 0;
50+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#ifndef _COMMON_INCLUDED_
2+
#define _COMMON_INCLUDED_
3+
4+
5+
#define MAX_TRIANGLES_IN_BATCH 512
6+
#define MAX_ACCUMULATED_SAMPLES 0x10000
7+
8+
9+
#define WORKGROUP_SIZE 256
10+
11+
12+
#ifdef __cplusplus
13+
#define uint uint32_t
14+
struct uvec2
15+
{
16+
uint x,y;
17+
};
18+
struct vec2
19+
{
20+
float x,y;
21+
};
22+
struct vec3
23+
{
24+
float x,y,z;
25+
};
26+
#define vec4 nbl::core::vectorSIMDf
27+
#define mat4 nbl::core::matrix4SIMD
28+
#define mat4x3 nbl::core::matrix3x4SIMD
29+
#endif
30+
31+
32+
#endif

examples_tests/41.VisibilityBuffer/main.cpp

Lines changed: 181 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,45 @@
1010
//! I advise to check out this file, its a basic input handler
1111
#include "../common/QToQuitEventReceiver.h"
1212

13+
#include "nbl/ext/DebugDraw/CDraw3DLine.h"
14+
1315
using namespace nbl;
1416
using namespace nbl::core;
1517
using namespace nbl::asset;
1618
using namespace nbl::video;
1719

20+
bool freezeCulling = false;
21+
22+
class MyEventReceiver : public QToQuitEventReceiver
23+
{
24+
public:
25+
26+
MyEventReceiver()
27+
{
28+
}
29+
30+
bool OnEvent(const SEvent& event)
31+
{
32+
if (event.EventType == nbl::EET_KEY_INPUT_EVENT && !event.KeyInput.PressedDown)
33+
{
34+
switch (event.KeyInput.Key)
35+
{
36+
case nbl::KEY_KEY_Q: // so we can quit
37+
return QToQuitEventReceiver::OnEvent(event);
38+
case nbl::KEY_KEY_C: // freeze culling
39+
freezeCulling = !freezeCulling; // Not enabled/necessary yet
40+
return true;
41+
default:
42+
break;
43+
}
44+
}
45+
46+
return false;
47+
}
48+
};
49+
1850
#include "common.h"
51+
#include "rasterizationCommon.h"
1952

2053
//vt stuff
2154
using STextureData = asset::ICPUVirtualTexture::SMasterTextureData;
@@ -31,6 +64,8 @@ struct commit_t
3164
asset::ICPUSampler::E_TEXTURE_CLAMP uwrap;
3265
asset::ICPUSampler::E_TEXTURE_CLAMP vwrap;
3366
asset::ICPUSampler::E_TEXTURE_BORDER_COLOR border;
67+
68+
core::vector<CullData_t> cullData;
3469
};
3570

3671
constexpr uint32_t TEX_OF_INTEREST_CNT = 6u;
@@ -155,6 +190,19 @@ struct SceneData
155190
smart_refctd_ptr<IGPUBuffer> ubo;
156191
};
157192

193+
struct CullShaderData
194+
{
195+
core::smart_refctd_ptr<IGPUBuffer> perBatchCull;
196+
core::smart_refctd_ptr<IGPUBuffer> commandBuffer;
197+
core::smart_refctd_ptr<IGPUBuffer> mvpBuffer;
198+
199+
core::smart_refctd_ptr<IGPUComputePipeline> cullPipeline;
200+
core::smart_refctd_ptr<IGPUDescriptorSetLayout> cullDSLayout;
201+
core::smart_refctd_ptr<IGPUDescriptorSet> cullDS;
202+
203+
uint32_t maxBatchCount;
204+
};
205+
158206
using MeshPacker = CCPUMeshPackerV2<DrawElementsIndirectCommand_t>;
159207
using GPUMeshPacker = CGPUMeshPackerV2<DrawElementsIndirectCommand_t>;
160208

@@ -204,14 +252,16 @@ int main()
204252

205253
//! Since our cursor will be enslaved, there will be no way to close the window
206254
//! So we listen for the "Q" key being pressed and exit the application
207-
QToQuitEventReceiver receiver;
255+
MyEventReceiver receiver;
208256
device->setEventReceiver(&receiver);
209257

210258
auto* driver = device->getVideoDriver();
211259
auto* smgr = device->getSceneManager();
212260
auto* am = device->getAssetManager();
213261
auto* fs = am->getFileSystem();
214262

263+
auto draw3DLine = ext::DebugDraw::CDraw3DLine::create(driver);
264+
215265
//
216266
auto createScreenSizedImage = [driver,&params](const E_FORMAT format) -> auto
217267
{
@@ -250,6 +300,8 @@ int main()
250300

251301
//
252302
SceneData sceneData;
303+
CullShaderData cullShaderData;
304+
core::vector<std::pair<ext::DebugDraw::S3DLineVertex, ext::DebugDraw::S3DLineVertex>> dbgLines;
253305
{
254306
//
255307
smart_refctd_ptr<IGPUDescriptorSetLayout> perFrameDSLayout,shadingDSLayout;
@@ -507,6 +559,9 @@ int main()
507559
core::vector<BatchInstanceData> batchData;
508560
batchData.reserve(mdiCntBound);
509561

562+
core::vector<CullData_t> batchCullData(mdiCntBound);
563+
auto batchCullDataEnd = batchCullData.begin();
564+
510565
allocDataIt = allocData->begin();
511566
uint32_t mdiListOffset = 0u;
512567
for (auto it=pipelineMeshBufferRanges.begin(); it!=pipelineMeshBufferRanges.end()-1u; )
@@ -517,7 +572,8 @@ int main()
517572
const uint32_t meshMdiBound = mp->calcMDIStructMaxCount(mbRangeBegin,mbRangeEnd);
518573
core::vector<IMeshPackerBase::PackedMeshBufferData> pmbd(std::distance(mbRangeBegin,mbRangeEnd));
519574
core::vector<MeshPacker::CombinedDataOffsetTable> cdot(meshMdiBound);
520-
uint32_t actualMdiCnt = mp->commit(pmbd.data(),cdot.data(),nullptr,&*allocDataIt,mbRangeBegin,mbRangeEnd);
575+
core::vector<aabbox3df> aabbs(meshMdiBound);
576+
uint32_t actualMdiCnt = mp->commit(pmbd.data(),cdot.data(),aabbs.data(),&*allocDataIt,mbRangeBegin,mbRangeEnd);
521577
allocDataIt += meshMdiBound;
522578

523579
if (actualMdiCnt==0u)
@@ -526,6 +582,28 @@ int main()
526582
_NBL_DEBUG_BREAK_IF(true);
527583
}
528584

585+
uint32_t aabbIdx = 0u;
586+
for (auto packedMeshBufferData : pmbd)
587+
{
588+
for (uint32_t i = 0u; i < packedMeshBufferData.mdiParameterCount; i++)
589+
{
590+
batchCullDataEnd->aabbMinEdge.x = aabbs[aabbIdx].MinEdge.X;
591+
batchCullDataEnd->aabbMinEdge.y = aabbs[aabbIdx].MinEdge.Y;
592+
batchCullDataEnd->aabbMinEdge.z = aabbs[aabbIdx].MinEdge.Z;
593+
594+
batchCullDataEnd->aabbMaxEdge.x = aabbs[aabbIdx].MaxEdge.X;
595+
batchCullDataEnd->aabbMaxEdge.y = aabbs[aabbIdx].MaxEdge.Y;
596+
batchCullDataEnd->aabbMaxEdge.z = aabbs[aabbIdx].MaxEdge.Z;
597+
598+
batchCullDataEnd->drawCommandGUID = packedMeshBufferData.mdiParameterOffset + i;
599+
600+
draw3DLine->enqueueBox(dbgLines, aabbs[aabbIdx], 0.0f, 0.0f, 0.0f, 1.0f, core::matrix3x4SIMD());
601+
602+
batchCullDataEnd++;
603+
aabbIdx++;
604+
}
605+
}
606+
529607
sceneData.pushConstantsData.push_back(mdiListOffset);
530608
mdiListOffset += actualMdiCnt;
531609

@@ -558,6 +636,11 @@ int main()
558636
gpump = core::make_smart_refctd_ptr<CGPUMeshPackerV2<>>(driver,mp.get());
559637
sceneData.mdiBuffer = gpump->getPackerDataStore().MDIDataBuffer;
560638
sceneData.idxBuffer = gpump->getPackerDataStore().indexBuffer;
639+
640+
cullShaderData.commandBuffer = gpump->getPackerDataStore().MDIDataBuffer;
641+
cullShaderData.maxBatchCount = std::distance(batchCullData.begin(), batchCullDataEnd);
642+
cullShaderData.perBatchCull = driver->createFilledDeviceLocalGPUBufferOnDedMem(cullShaderData.maxBatchCount * sizeof(CullData_t), batchCullData.data());
643+
cullShaderData.mvpBuffer = driver->createDeviceLocalGPUBufferOnDedMem(cullShaderData.maxBatchCount * sizeof(core::matrix4SIMD));
561644
}
562645
mesh_raw->convertToDummyObject(~0u);
563646

@@ -738,6 +821,94 @@ int main()
738821
}
739822
}
740823

824+
// cull shader ds
825+
{
826+
SPushConstantRange range{ ISpecializedShader::ESS_COMPUTE,0u,sizeof(CullShaderData_t) };
827+
828+
{
829+
IGPUDescriptorSetLayout::SBinding bindings[3];
830+
bindings[0].binding = 0u;
831+
bindings[0].count = 1u;
832+
bindings[0].samplers = nullptr;
833+
bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE;
834+
bindings[0].type = EDT_STORAGE_BUFFER;
835+
836+
bindings[1].binding = 1u;
837+
bindings[1].count = 1u;
838+
bindings[1].samplers = nullptr;
839+
bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE;
840+
bindings[1].type = EDT_STORAGE_BUFFER;
841+
842+
bindings[2].binding = 2u;
843+
bindings[2].count = 1u;
844+
bindings[2].samplers = nullptr;
845+
bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE;
846+
bindings[2].type = EDT_STORAGE_BUFFER;
847+
848+
cullShaderData.cullDSLayout = driver->createGPUDescriptorSetLayout(bindings, bindings + sizeof(bindings) / sizeof(IGPUDescriptorSetLayout::SBinding));
849+
}
850+
851+
{
852+
IGPUDescriptorSet::SDescriptorInfo infos[3];
853+
854+
infos[0].desc = core::smart_refctd_ptr(cullShaderData.perBatchCull);
855+
infos[0].buffer.offset = 0u;
856+
infos[0].buffer.size = cullShaderData.perBatchCull->getSize();
857+
858+
infos[1].desc = core::smart_refctd_ptr(cullShaderData.commandBuffer);
859+
infos[1].buffer.offset = 0u;
860+
infos[1].buffer.size = cullShaderData.commandBuffer->getSize();
861+
862+
infos[2].desc = core::smart_refctd_ptr(cullShaderData.mvpBuffer);
863+
infos[2].buffer.offset = 0u;
864+
infos[2].buffer.size = cullShaderData.mvpBuffer->getSize();
865+
866+
cullShaderData.cullDS = driver->createGPUDescriptorSet(smart_refctd_ptr(cullShaderData.cullDSLayout));
867+
868+
IGPUDescriptorSet::SWriteDescriptorSet writes[3];
869+
870+
for (uint32_t i = 0u; i < 3; i++)
871+
{
872+
writes[i].dstSet = cullShaderData.cullDS.get();
873+
writes[i].binding = i;
874+
writes[i].arrayElement = 0u;
875+
writes[i].count = 1u;
876+
writes[i].descriptorType = EDT_STORAGE_BUFFER;
877+
writes[i].info = infos + i;
878+
}
879+
880+
driver->updateDescriptorSets(sizeof(writes) / sizeof(IGPUDescriptorSet::SWriteDescriptorSet), writes, 0u, nullptr);
881+
}
882+
883+
asset::IAssetLoader::SAssetLoadParams lp;
884+
auto cullShader = IAsset::castDown<ICPUSpecializedShader>(*am->getAsset("../cull.comp", lp).getContents().begin());
885+
assert(cullShader);
886+
const asset::ICPUShader* unspec = cullShader->getUnspecialized();
887+
assert(unspec->containsGLSL());
888+
889+
auto gpuCullShader = driver->getGPUObjectsFromAssets(&cullShader, &cullShader + 1u)->begin()[0];
890+
891+
auto cullPipelineLayout = driver->createGPUPipelineLayout(&range, &range + 1u, core::smart_refctd_ptr(cullShaderData.cullDSLayout));
892+
cullShaderData.cullPipeline = driver->createGPUComputePipeline(nullptr, std::move(cullPipelineLayout), std::move(gpuCullShader));
893+
}
894+
895+
auto cullBatches = [&driver, &cullShaderData](const core::matrix4SIMD& vp, bool freezeCulling)
896+
{
897+
driver->bindDescriptorSets(EPBP_COMPUTE, cullShaderData.cullPipeline->getLayout(), 0u, 1u, &cullShaderData.cullDS.get(), nullptr);
898+
driver->bindComputePipeline(cullShaderData.cullPipeline.get());
899+
900+
CullShaderData_t cullPushConstants;
901+
cullPushConstants.viewProjMatrix = vp;
902+
cullPushConstants.maxBatchCount = cullShaderData.maxBatchCount;
903+
cullPushConstants.freezeCulling = static_cast<uint32_t>(freezeCulling);
904+
905+
driver->pushConstants(cullShaderData.cullPipeline->getLayout(), ISpecializedShader::ESS_COMPUTE, 0u, sizeof(CullShaderData_t), &cullPushConstants);
906+
907+
const uint32_t cullWorkGroups = (cullPushConstants.maxBatchCount - 1u) / WORKGROUP_SIZE + 1u;
908+
909+
driver->dispatch(cullWorkGroups, 1u, 1u);
910+
};
911+
741912
//! we want to move around the scene and view it from different angles
742913
scene::ICameraSceneNode* camera = smgr->addCameraSceneNodeFPS(0, 100.0f, 0.5f);
743914

@@ -747,8 +918,8 @@ int main()
747918
camera->setFarValue(5000.0f);
748919

749920
smgr->setActiveCamera(camera);
750-
751921

922+
bool asdf = true;
752923
uint64_t lastFPSTime = 0;
753924
while (device->run() && receiver.keepOpen())
754925
{
@@ -764,7 +935,9 @@ int main()
764935
memcpy(uboData.NormalMat, camera->getViewMatrix().pointer(), sizeof(core::matrix3x4SIMD));
765936
driver->updateBufferRangeViaStagingBuffer(sceneData.ubo.get(), 0u, sizeof(SBasicViewParameters), &uboData);
766937

767-
// TODO: Cull MDIs
938+
// cull MDIs
939+
cullBatches(camera->getConcatenatedMatrix(), freezeCulling);
940+
COpenGLExtensionHandler::pGlMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);
768941

769942
driver->setRenderTarget(visBuffer);
770943
driver->clearZBuffer();
@@ -788,6 +961,9 @@ int main()
788961
);
789962
}
790963

964+
//draw aabbs
965+
draw3DLine->draw(camera->getConcatenatedMatrix(), dbgLines);
966+
791967
// shade
792968
driver->bindDescriptorSets(video::EPBP_COMPUTE,sceneData.shadeVBufferPpln->getLayout(),0u,4u,ds,nullptr);
793969
driver->bindComputePipeline(sceneData.shadeVBufferPpln.get());
@@ -800,6 +976,7 @@ int main()
800976

801977
// blit
802978
driver->blitRenderTargets(fb,0);
979+
803980
driver->endScene();
804981

805982
// display frames per second in window title

0 commit comments

Comments
 (0)