Skip to content

Commit 5a03c28

Browse files
committed
Created cull shader
1 parent 14f7c4a commit 5a03c28

File tree

5 files changed

+238
-2
lines changed

5 files changed

+238
-2
lines changed

examples_tests/41.VisibilityBuffer/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ if(NOT RES)
44
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
55
endif()
66

7-
nbl_create_executable_project("" "" "" "")
7+
nbl_create_executable_project(../../src/nbl/ext/DebugDraw/CDraw3DLine.cpp "" "" "")
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#version 430 core
2+
3+
#include "rasterizationCommon.h"
4+
layout(local_size_x = WORKGROUP_SIZE) in;
5+
6+
#include <nbl/builtin/glsl/utils/indirect_commands.glsl>
7+
8+
layout(set=0, binding=0, std430, row_major) restrict readonly buffer PerInstanceCull
9+
{
10+
CullData_t cullData[];
11+
};
12+
layout(set=0, binding=1, std430) restrict coherent buffer IndirectDraws
13+
{
14+
nbl_glsl_DrawElementsIndirectCommand_t draws[];
15+
} commandBuff;
16+
17+
18+
19+
layout(push_constant, row_major) uniform PushConstants
20+
{
21+
CullShaderData_t data;
22+
} pc;
23+
24+
25+
26+
#include <nbl/builtin/glsl/utils/culling.glsl>
27+
#include <nbl/builtin/glsl/utils/transform.glsl>
28+
29+
30+
void main()
31+
{
32+
if (gl_GlobalInvocationID.x >= pc.data.maxBatchCount)
33+
return;
34+
35+
const CullData_t batchCullData = cullData[gl_GlobalInvocationID.x];
36+
37+
const mat2x3 bbox = mat2x3(batchCullData.aabbMinEdge,batchCullData.aabbMaxEdge);
38+
bool couldBeVisible = nbl_glsl_couldBeVisible(pc.data.viewProjMatrix,bbox);
39+
40+
commandBuff.draws[batchCullData.drawCommandGUID].instanceCount = couldBeVisible == true ? 1 : 0;
41+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#ifndef _COMMON_INCLUDED_
2+
#define _COMMON_INCLUDED_
3+
4+
5+
#define MAX_TRIANGLES_IN_BATCH 512
6+
#define MAX_ACCUMULATED_SAMPLES 0x10000
7+
8+
9+
#define WORKGROUP_SIZE 256
10+
11+
12+
#ifdef __cplusplus
13+
#define uint uint32_t
14+
struct uvec2
15+
{
16+
uint x,y;
17+
};
18+
struct vec2
19+
{
20+
float x,y;
21+
};
22+
struct vec3
23+
{
24+
float x,y,z;
25+
};
26+
#define vec4 nbl::core::vectorSIMDf
27+
#define mat4 nbl::core::matrix4SIMD
28+
#define mat4x3 nbl::core::matrix3x4SIMD
29+
#endif
30+
31+
32+
#endif

examples_tests/41.VisibilityBuffer/main.cpp

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,15 @@
1010
//! I advise to check out this file, its a basic input handler
1111
#include "../common/QToQuitEventReceiver.h"
1212

13+
#include "nbl/ext/DebugDraw/CDraw3DLine.h"
14+
1315
using namespace nbl;
1416
using namespace nbl::core;
1517
using namespace nbl::asset;
1618
using namespace nbl::video;
1719

1820
#include "common.h"
21+
#include "rasterizationCommon.h"
1922

2023
//vt stuff
2124
using STextureData = asset::ICPUVirtualTexture::SMasterTextureData;
@@ -31,6 +34,8 @@ struct commit_t
3134
asset::ICPUSampler::E_TEXTURE_CLAMP uwrap;
3235
asset::ICPUSampler::E_TEXTURE_CLAMP vwrap;
3336
asset::ICPUSampler::E_TEXTURE_BORDER_COLOR border;
37+
38+
core::vector<CullData_t> cullData;
3439
};
3540

3641
constexpr uint32_t TEX_OF_INTEREST_CNT = 6u;
@@ -155,6 +160,18 @@ struct SceneData
155160
smart_refctd_ptr<IGPUBuffer> ubo;
156161
};
157162

163+
struct CullShaderData
164+
{
165+
core::smart_refctd_ptr<IGPUBuffer> perBatchCull;
166+
core::smart_refctd_ptr<IGPUBuffer> commandBuffer;
167+
168+
core::smart_refctd_ptr<IGPUComputePipeline> cullPipeline;
169+
core::smart_refctd_ptr<IGPUDescriptorSetLayout> cullDSLayout;
170+
core::smart_refctd_ptr<IGPUDescriptorSet> cullDS;
171+
172+
uint32_t maxBatchCount;
173+
};
174+
158175
using MeshPacker = CCPUMeshPackerV2<DrawElementsIndirectCommand_t>;
159176
using GPUMeshPacker = CGPUMeshPackerV2<DrawElementsIndirectCommand_t>;
160177

@@ -212,6 +229,8 @@ int main()
212229
auto* am = device->getAssetManager();
213230
auto* fs = am->getFileSystem();
214231

232+
auto draw3DLine = ext::DebugDraw::CDraw3DLine::create(driver);
233+
215234
//
216235
auto createScreenSizedImage = [driver,&params](const E_FORMAT format) -> auto
217236
{
@@ -250,6 +269,8 @@ int main()
250269

251270
//
252271
SceneData sceneData;
272+
CullShaderData cullShaderData;
273+
core::vector<std::pair<ext::DebugDraw::S3DLineVertex, ext::DebugDraw::S3DLineVertex>> dbgLines;
253274
{
254275
//
255276
smart_refctd_ptr<IGPUDescriptorSetLayout> perFrameDSLayout,shadingDSLayout;
@@ -507,6 +528,9 @@ int main()
507528
core::vector<BatchInstanceData> batchData;
508529
batchData.reserve(mdiCntBound);
509530

531+
core::vector<CullData_t> batchCullData(mdiCntBound);
532+
auto batchCullDataEnd = batchCullData.begin();
533+
510534
allocDataIt = allocData->begin();
511535
uint32_t mdiListOffset = 0u;
512536
for (auto it=pipelineMeshBufferRanges.begin(); it!=pipelineMeshBufferRanges.end()-1u; )
@@ -517,7 +541,8 @@ int main()
517541
const uint32_t meshMdiBound = mp->calcMDIStructMaxCount(mbRangeBegin,mbRangeEnd);
518542
core::vector<IMeshPackerBase::PackedMeshBufferData> pmbd(std::distance(mbRangeBegin,mbRangeEnd));
519543
core::vector<MeshPacker::CombinedDataOffsetTable> cdot(meshMdiBound);
520-
uint32_t actualMdiCnt = mp->commit(pmbd.data(),cdot.data(),nullptr,&*allocDataIt,mbRangeBegin,mbRangeEnd);
544+
core::vector<aabbox3df> aabbs(meshMdiBound);
545+
uint32_t actualMdiCnt = mp->commit(pmbd.data(),cdot.data(),aabbs.data(),&*allocDataIt,mbRangeBegin,mbRangeEnd);
521546
allocDataIt += meshMdiBound;
522547

523548
if (actualMdiCnt==0u)
@@ -526,6 +551,24 @@ int main()
526551
_NBL_DEBUG_BREAK_IF(true);
527552
}
528553

554+
for (uint32_t i = 0u; i < actualMdiCnt; i++)
555+
{
556+
batchCullDataEnd->aabbMinEdge.x = aabbs[i].MinEdge.X;
557+
batchCullDataEnd->aabbMinEdge.y = aabbs[i].MinEdge.Y;
558+
batchCullDataEnd->aabbMinEdge.z = aabbs[i].MinEdge.Z;
559+
560+
batchCullDataEnd->aabbMaxEdge.x = aabbs[i].MaxEdge.X;
561+
batchCullDataEnd->aabbMaxEdge.y = aabbs[i].MaxEdge.Y;
562+
batchCullDataEnd->aabbMaxEdge.z = aabbs[i].MaxEdge.Z;
563+
564+
batchCullDataEnd->drawCommandGUID = pmbd[i].mdiParameterOffset + i;
565+
assert(pmbd[i].mdiParameterOffset + i <= pmbd[i].mdiParameterCount);
566+
567+
draw3DLine->enqueueBox(dbgLines, aabbs[i], 1.0f, 0.0f, 0.0f, 1.0f, core::matrix3x4SIMD());
568+
569+
batchCullDataEnd++;
570+
}
571+
529572
sceneData.pushConstantsData.push_back(mdiListOffset);
530573
mdiListOffset += actualMdiCnt;
531574

@@ -558,6 +601,10 @@ int main()
558601
gpump = core::make_smart_refctd_ptr<CGPUMeshPackerV2<>>(driver,mp.get());
559602
sceneData.mdiBuffer = gpump->getPackerDataStore().MDIDataBuffer;
560603
sceneData.idxBuffer = gpump->getPackerDataStore().indexBuffer;
604+
605+
cullShaderData.commandBuffer = gpump->getPackerDataStore().MDIDataBuffer;
606+
cullShaderData.maxBatchCount = std::distance(batchCullData.begin(), batchCullDataEnd);
607+
cullShaderData.perBatchCull = driver->createFilledDeviceLocalGPUBufferOnDedMem(cullShaderData.maxBatchCount, batchCullData.data());
561608
}
562609
mesh_raw->convertToDummyObject(~0u);
563610

@@ -738,6 +785,83 @@ int main()
738785
}
739786
}
740787

788+
{
789+
SPushConstantRange range{ ISpecializedShader::ESS_COMPUTE,0u,sizeof(CullShaderData_t) };
790+
791+
{
792+
IGPUDescriptorSetLayout::SBinding bindings[2];
793+
bindings[0].binding = 0u;
794+
bindings[0].count = 1u;
795+
bindings[0].samplers = nullptr;
796+
bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE;
797+
bindings[0].type = EDT_STORAGE_BUFFER;
798+
799+
bindings[1].binding = 1u;
800+
bindings[1].count = 1u;
801+
bindings[1].samplers = nullptr;
802+
bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE;
803+
bindings[1].type = EDT_STORAGE_BUFFER;
804+
805+
cullShaderData.cullDSLayout = driver->createGPUDescriptorSetLayout(bindings, bindings + sizeof(bindings) / sizeof(IGPUDescriptorSetLayout::SBinding));
806+
}
807+
808+
{
809+
IGPUDescriptorSet::SDescriptorInfo infos[2];
810+
811+
infos[0].desc = core::smart_refctd_ptr(cullShaderData.perBatchCull);
812+
infos[0].buffer.offset = 0u;
813+
infos[0].buffer.size = cullShaderData.perBatchCull->getSize();
814+
815+
infos[1].desc = core::smart_refctd_ptr(cullShaderData.commandBuffer);
816+
infos[1].buffer.offset = 0u;
817+
infos[1].buffer.size = cullShaderData.commandBuffer->getSize();
818+
819+
cullShaderData.cullDS = driver->createGPUDescriptorSet(smart_refctd_ptr(cullShaderData.cullDSLayout));
820+
821+
IGPUDescriptorSet::SWriteDescriptorSet writes[2];
822+
823+
for (uint32_t i = 0u; i < 2; i++)
824+
{
825+
writes[i].dstSet = cullShaderData.cullDS.get();
826+
writes[i].binding = i;
827+
writes[i].arrayElement = 0u;
828+
writes[i].count = 1u;
829+
writes[i].descriptorType = EDT_STORAGE_BUFFER;
830+
writes[i].info = infos + i;
831+
}
832+
833+
driver->updateDescriptorSets(sizeof(writes) / sizeof(IGPUDescriptorSet::SWriteDescriptorSet), writes, 0u, nullptr);
834+
}
835+
836+
asset::IAssetLoader::SAssetLoadParams lp;
837+
auto cullShader = IAsset::castDown<ICPUSpecializedShader>(*am->getAsset("../cull.comp", lp).getContents().begin());
838+
assert(cullShader);
839+
const asset::ICPUShader* unspec = cullShader->getUnspecialized();
840+
assert(unspec->containsGLSL());
841+
842+
auto gpuCullShader = driver->getGPUObjectsFromAssets(&cullShader, &cullShader + 1u)->begin()[0];
843+
844+
auto cullPipelineLayout = driver->createGPUPipelineLayout(&range, &range + 1u, core::smart_refctd_ptr(cullShaderData.cullDSLayout));
845+
cullShaderData.cullPipeline = driver->createGPUComputePipeline(nullptr, std::move(cullPipelineLayout), std::move(gpuCullShader));
846+
}
847+
848+
auto cullBatches = [&driver, &cullShaderData](const core::matrix4SIMD& vp)
849+
{
850+
driver->bindDescriptorSets(EPBP_COMPUTE, cullShaderData.cullPipeline->getLayout(), 0u, 1u, &cullShaderData.cullDS.get(), nullptr);
851+
driver->bindComputePipeline(cullShaderData.cullPipeline.get());
852+
853+
CullShaderData_t cullPushConstants;
854+
cullPushConstants.viewProjMatrix = vp;
855+
cullPushConstants.viewProjDeterminant = core::determinant(vp);
856+
cullPushConstants.maxBatchCount = cullShaderData.maxBatchCount; //TODO: this should be uniform, and set only once
857+
858+
driver->pushConstants(cullShaderData.cullPipeline->getLayout(), ISpecializedShader::ESS_COMPUTE, 0u, sizeof(CullShaderData_t), &cullPushConstants);
859+
860+
const uint32_t cullWorkGroups = (cullPushConstants.maxBatchCount - 1u) / WORKGROUP_SIZE + 1u;
861+
862+
driver->dispatch(cullWorkGroups, 1u, 1u);
863+
};
864+
741865
//! we want to move around the scene and view it from different angles
742866
scene::ICameraSceneNode* camera = smgr->addCameraSceneNodeFPS(0, 100.0f, 0.5f);
743867

@@ -748,6 +872,8 @@ int main()
748872

749873
smgr->setActiveCamera(camera);
750874

875+
//tmp shit
876+
core::matrix4SIMD vpFromFirstFrame;
751877

752878
uint64_t lastFPSTime = 0;
753879
while (device->run() && receiver.keepOpen())
@@ -758,13 +884,17 @@ int main()
758884
camera->OnAnimate(std::chrono::duration_cast<std::chrono::milliseconds>(device->getTimer()->getTime()).count());
759885
camera->render();
760886

887+
if (lastFPSTime == 0)
888+
vpFromFirstFrame = camera->getConcatenatedMatrix();
889+
761890
SBasicViewParameters uboData;
762891
memcpy(uboData.MVP, camera->getConcatenatedMatrix().pointer(), sizeof(core::matrix4SIMD));
763892
memcpy(uboData.MV, camera->getViewMatrix().pointer(), sizeof(core::matrix3x4SIMD));
764893
memcpy(uboData.NormalMat, camera->getViewMatrix().pointer(), sizeof(core::matrix3x4SIMD));
765894
driver->updateBufferRangeViaStagingBuffer(sceneData.ubo.get(), 0u, sizeof(SBasicViewParameters), &uboData);
766895

767896
// TODO: Cull MDIs
897+
cullBatches(vpFromFirstFrame);
768898

769899
driver->setRenderTarget(visBuffer);
770900
driver->clearZBuffer();
@@ -788,6 +918,9 @@ int main()
788918
);
789919
}
790920

921+
//draw aabbs
922+
draw3DLine->draw(camera->getConcatenatedMatrix(), dbgLines);
923+
791924
// shade
792925
driver->bindDescriptorSets(video::EPBP_COMPUTE,sceneData.shadeVBufferPpln->getLayout(),0u,4u,ds,nullptr);
793926
driver->bindComputePipeline(sceneData.shadeVBufferPpln.get());
@@ -800,6 +933,7 @@ int main()
800933

801934
// blit
802935
driver->blitRenderTargets(fb,0);
936+
803937
driver->endScene();
804938

805939
// display frames per second in window title
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#ifndef _RASTERIZATION_COMMON_H_INCLUDED_
2+
#define _RASTERIZATION_COMMON_H_INCLUDED_
3+
4+
#include "cullShaderCommon.h"
5+
6+
struct CullShaderData_t
7+
{
8+
mat4 viewProjMatrix;
9+
float viewProjDeterminant;
10+
uint maxBatchCount;
11+
};
12+
13+
struct CullData_t
14+
{
15+
vec3 aabbMinEdge;
16+
vec3 aabbMaxEdge;
17+
uint drawCommandGUID;
18+
};
19+
20+
struct DrawData_t
21+
{
22+
mat4 MVP;
23+
uint backfacingBit_batchInstanceGUID;
24+
uint firstIndex;
25+
uint padding1;
26+
uint padding2;
27+
};
28+
29+
#endif

0 commit comments

Comments
 (0)