Skip to content

Commit 6d667cf

Browse files
committed
Cleanup
1 parent a5b672a commit 6d667cf

15 files changed

+97
-91
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@
66
*/.vscode/
77
/CMakeSettings.json
88
/*.cap
9+
/*.rdc
10+
/Nsight/

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Implementation of [Clustered Shading](https://efficientshading.com/wp-content/up
66

77
![Cluster light count visualization](images/sponza_cluster_vis.png)
88

9-
The code is programmed to support bgfx's OpenGL and DirectX 11/12 backends. I've only tested it on one Windows 10 machine with an Nvidia GTX 1070, however. Other hardware or operating systems might have subtle bugs I'm not aware of.
9+
Currently bgfx's OpenGL, DirectX 11/12 and Vulkan backends are supported. I've only tested on Windows 10 with an Nvidia GTX 1070. Other hardware or operating systems might have subtle bugs I'm not aware of.
1010

1111
## Functionality
1212

src/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ foreach(SHADER ${SHADERS})
116116
get_filename_component(SHADER_NAME "${SHADER}" NAME)
117117
get_filename_component(SHADER_FILE "${SHADER}" ABSOLUTE)
118118
# add_shader defaults to GLSL 120, 430 for compute
119-
# GLSL 1.30 is needed for switch statements but shaderc produces
120-
# shader binaries without any #version directive unless 4.30 is used
119+
# GLSL 1.30 is needed for switch statements but shaderc produces shader
120+
# binaries without any #version directive unless we request 4.00 or higher
121121
# this makes shader compilation fail with no error message
122122
# just use 4.30 for all shaders
123123
# default DX11 model is 5_0

src/Cluster.cpp

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,7 @@ Cluster::~Cluster()
4646
int Cluster::run(int argc, char* argv[])
4747
{
4848
config->readArgv(argc, argv);
49-
return Application::run(argc, argv, config->renderer, BGFX_PCI_ID_NONE, 0, &callbacks, nullptr);
50-
}
5149

52-
void Cluster::initialize(int _argc, char* _argv[])
53-
{
5450
if(config->writeLog)
5551
{
5652
// _mt (thread safe) necessary because of flush_every
@@ -64,6 +60,11 @@ void Cluster::initialize(int _argc, char* _argv[])
6460
Log->set_level(spdlog::level::trace);
6561
spdlog::flush_every(std::chrono::seconds(2));
6662

63+
return Application::run(argc, argv, config->renderer, BGFX_PCI_ID_NONE, 0, &callbacks, nullptr);
64+
}
65+
66+
void Cluster::initialize(int _argc, char* _argv[])
67+
{
6768
if(!ForwardRenderer::supported())
6869
{
6970
Log->error("Forward renderer not supported on this hardware");
@@ -102,24 +103,25 @@ void Cluster::initialize(int _argc, char* _argv[])
102103
ui->initialize();
103104

104105
Scene::init();
105-
// TODO multithreaded
106-
// textures still have to be loaded from main thread
107-
// keep list and load one texture every X frames
106+
108107
if(!scene->load(config->sceneFile))
109108
{
110109
Log->error("Loading scene model failed");
111110
close();
112111
return;
113112
}
114113

115-
// Sponza
116-
// debug camera + lights
117-
scene->camera.lookAt({ -7.0f, 2.0f, 0.0f }, scene->center, glm::vec3(0.0f, 1.0f, 0.0f));
118-
scene->pointLights.lights = { // pos, power
119-
{ { -5.0f, 0.3f, 0.0f }, { 100.0f, 100.0f, 100.0f } },
120-
{ { 0.0f, 0.3f, 0.0f }, { 100.0f, 100.0f, 100.0f } },
121-
{ { 5.0f, 0.3f, 0.0f }, { 100.0f, 100.0f, 100.0f } }
122-
};
114+
// Sponza debug camera + lights
115+
if(!config->customScene)
116+
{
117+
scene->camera.lookAt({ -7.0f, 2.0f, 0.0f }, scene->center, glm::vec3(0.0f, 1.0f, 0.0f));
118+
scene->pointLights.lights = { // pos, power
119+
{ { -5.0f, 0.3f, 0.0f }, { 100.0f, 100.0f, 100.0f } },
120+
{ { 0.0f, 0.3f, 0.0f }, { 100.0f, 100.0f, 100.0f } },
121+
{ { 5.0f, 0.3f, 0.0f }, { 100.0f, 100.0f, 100.0f } }
122+
};
123+
}
124+
123125
scene->pointLights.update();
124126
config->lights = (int)scene->pointLights.lights.size();
125127

@@ -190,7 +192,7 @@ void Cluster::onScroll(double xoffset, double yoffset)
190192
void Cluster::update(float dt)
191193
{
192194
float velocity = scene->diagonal / 5.0f; // m/s
193-
// TODO faster with Shift
195+
// TODO move faster with Shift
194196
// need to cache mods & GLFW_MOD_SHIFT in onKey
195197
if(isKeyDown(GLFW_KEY_W))
196198
scene->camera.move(scene->camera.forward() * velocity * dt);
@@ -226,11 +228,6 @@ void Cluster::update(float dt)
226228

227229
int Cluster::shutdown()
228230
{
229-
// TODO
230-
// not all resources are freed
231-
// e.g. Renderer::blitSampler has count 3 on shutdown
232-
// might be because of threaded renderer or command buffer taking a while
233-
234231
ui->shutdown();
235232
renderer->shutdown();
236233
scene->clear();
@@ -404,9 +401,12 @@ void Cluster::generateLights(unsigned int count)
404401

405402
for(size_t i = keep; i < count; i++)
406403
{
407-
glm::vec3 position = glm::vec3(dist(mt), dist(mt), dist(mt)) * scale - (scale * 0.5f);
408-
//position += scene->center; // not Sponza
409-
position.y = glm::abs(position.y); // Sponza
404+
glm::vec3 position = scene->center;
405+
position += glm::vec3(dist(mt), dist(mt), dist(mt)) * scale - (scale * 0.5f);
406+
407+
if(!config->customScene) // Sponza, no lights under the floor
408+
position.y = glm::abs(position.y);
409+
410410
glm::vec3 color = glm::vec3(dist(mt), dist(mt), dist(mt));
411411
glm::vec3 power = color * (dist(mt) * (POWER_MAX - POWER_MIN) + POWER_MIN);
412412
lights[i] = { position, power };

src/Config.cpp

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,48 +7,53 @@ Config::Config() :
77
writeLog(true),
88
logFile("Cluster.log"),
99
renderer(bgfx::RendererType::Count), // default renderer, chosen by platform
10-
renderPath(Cluster::RenderPath::Forward),
10+
renderPath(Cluster::RenderPath::Clustered),
1111
tonemappingMode(Renderer::TonemappingMode::ACES),
1212
profile(false),
1313
vsync(false),
14-
msaa(false),
15-
//sceneFile("assets/models/BoomBox/glTF/BoomBox.gltf"),
16-
//sceneFile("assets/models/Duck/glTF/Duck.gltf"),
17-
//sceneFile("assets/models/MetalRoughSpheres/glTF/MetalRoughSpheres.gltf"),
1814
sceneFile("assets/models/Sponza/glTF/Sponza.gltf"),
15+
customScene(false),
1916
lights(1),
2017
maxLights(1000),
2118
movingLights(false),
2219
fullscreen(false),
2320
showUI(true),
2421
showConfigWindow(true),
25-
showLog(true),
26-
showStatsOverlay(true),
22+
showLog(false),
23+
showStatsOverlay(false),
2724
overlays({ true, true, true, true }),
28-
showBuffers(true),
25+
showBuffers(false),
2926
debugVisualization(false)
3027
{
3128
}
3229

3330
void Config::readArgv(int argc, char* argv[])
3431
{
32+
// argv must outlive Config
33+
// we store pointers into argv for the scene file
3534
bx::CommandLine cmdLine(argc, argv);
3635

37-
// D3D 9.0c (shader model 3.0) doesn't allow indexing into the light buffer
38-
// D3D11, D3D12, OpenGL work
39-
// Vulkan has issues:
40-
// - no sRGB backbuffer support
41-
// - clustered shading doesn't work, some descriptors are not getting bound correctly
42-
// it works in RenderDoc for a few seconds (with similar errors but different bindings), then crashes
36+
if(cmdLine.hasArg("noop"))
37+
renderer = bgfx::RendererType::Noop;
38+
else if(cmdLine.hasArg("gl"))
39+
renderer = bgfx::RendererType::OpenGL;
40+
else if(cmdLine.hasArg("vk"))
41+
renderer = bgfx::RendererType::Vulkan;
42+
// missing required features
43+
//else if(cmdLine.hasArg("d3d9"))
44+
// renderer = bgfx::RendererType::Direct3D9;
45+
else if(cmdLine.hasArg("d3d11"))
46+
renderer = bgfx::RendererType::Direct3D11;
47+
else if(cmdLine.hasArg("d3d12"))
48+
renderer = bgfx::RendererType::Direct3D12;
49+
// not tested
50+
//else if(cmdLine.hasArg("mtl"))
51+
// renderer = bgfx::RendererType::Metal;
4352

44-
renderer = bgfx::RendererType::OpenGL;
45-
//renderer = bgfx::RendererType::Direct3D11;
46-
//renderer = bgfx::RendererType::Direct3D12;
47-
renderer = bgfx::RendererType::Vulkan;
48-
49-
profile = true;
50-
51-
showStatsOverlay = false;
52-
showLog = false;
53-
showBuffers = false;
53+
const char* scene = cmdLine.findOption("scene");
54+
if(scene)
55+
{
56+
sceneFile = scene;
57+
customScene = true;
58+
}
5459
}

src/Config.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,28 @@ class Config
1111

1212
void readArgv(int argc, char* argv[]);
1313

14+
// * = not exposed to UI
15+
1416
// Log
1517

16-
bool writeLog; // not exposed to UI
17-
const char* logFile; // not exposed to UI
18+
bool writeLog; // *
19+
const char* logFile; // *
1820

1921
// Renderer
2022

21-
bgfx::RendererType::Enum renderer; // not exposed to UI
23+
bgfx::RendererType::Enum renderer; // *
2224
Cluster::RenderPath renderPath;
2325
Renderer::TonemappingMode tonemappingMode;
2426

25-
bool profile; // not exposed to UI
26-
bool vsync; // not exposed to UI
27-
bool msaa; // not exposed to UI
27+
bool profile; // enable bgfx view profiling *
28+
bool vsync; // *
2829

2930
// Scene
3031

31-
const char* sceneFile; // not exposed to UI
32+
const char* sceneFile; // gltf file to load *
33+
bool customScene; // not the standard Sponza scene, don't place debug lights/camera *
3234
int lights;
33-
int maxLights; // not exposed to UI
35+
int maxLights; // *
3436
bool movingLights;
3537

3638
// UI

src/Renderer/ClusterShader.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ class ClusterShader
1919
static constexpr uint32_t CLUSTERS_Y = 8;
2020
static constexpr uint32_t CLUSTERS_Z = 24;
2121

22-
// D3D compute shaders only allow up to 1024 threads
23-
// shader will be invoked 3 times
24-
static constexpr uint32_t CLUSTERS_Z_THREADS = 8;
22+
// limit number of threads (D3D only allows up to 1024, there might also be shared memory limitations)
23+
// shader will be run by 6 work groups
24+
static constexpr uint32_t CLUSTERS_X_THREADS = 16;
25+
static constexpr uint32_t CLUSTERS_Y_THREADS = 8;
26+
static constexpr uint32_t CLUSTERS_Z_THREADS = 4;
2527

2628
static constexpr uint32_t CLUSTER_COUNT = CLUSTERS_X * CLUSTERS_Y * CLUSTERS_Z;
2729

src/Renderer/ClusteredRenderer.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ bool ClusteredRenderer::supported()
2222
// compute shader
2323
(caps->supported & BGFX_CAPS_COMPUTE) != 0 &&
2424
// 32-bit index buffers, used for light grid structure
25-
((caps->supported & BGFX_CAPS_INDEX32) != 0) &&
26-
// fragment depth available in fragment shader
27-
(caps->supported & BGFX_CAPS_FRAGMENT_DEPTH) != 0;
25+
(caps->supported & BGFX_CAPS_INDEX32) != 0;
2826
}
2927

3028
void ClusteredRenderer::onInitialize()
@@ -58,15 +56,15 @@ void ClusteredRenderer::onRender(float dt)
5856
};
5957

6058
bgfx::setViewName(vClusterBuilding, "Cluster building pass (compute)");
61-
bgfx::setViewClear(vClusterBuilding, BGFX_CLEAR_NONE);
59+
//bgfx::setViewClear(vClusterBuilding, BGFX_CLEAR_NONE);
6260
// set u_viewRect for screen2Eye to work correctly
6361
bgfx::setViewRect(vClusterBuilding, 0, 0, width, height);
6462
// this could be set by a different renderer, reset it (D3D12 cares and crashes)
6563
bgfx::setViewFrameBuffer(vClusterBuilding, BGFX_INVALID_HANDLE);
6664
//bgfx::touch(vClusterBuilding);
6765

6866
bgfx::setViewName(vLightCulling, "Clustered light culling pass (compute)");
69-
bgfx::setViewClear(vLightCulling, BGFX_CLEAR_NONE);
67+
//bgfx::setViewClear(vLightCulling, BGFX_CLEAR_NONE);
7068
bgfx::setViewRect(vLightCulling, 0, 0, width, height);
7169
bgfx::setViewFrameBuffer(vLightCulling, BGFX_INVALID_HANDLE);
7270
//bgfx::touch(vLightCulling);
@@ -102,20 +100,18 @@ void ClusteredRenderer::onRender(float dt)
102100

103101
lights.bindLights(scene);
104102
clusters.bindBuffers(false); // write access, all buffers
105-
106103
bgfx::dispatch(vLightCulling,
107104
lightCullingComputeProgram,
108-
1,
109-
1,
105+
ClusterShader::CLUSTERS_X / ClusterShader::CLUSTERS_X_THREADS,
106+
ClusterShader::CLUSTERS_Y / ClusterShader::CLUSTERS_Y_THREADS,
110107
ClusterShader::CLUSTERS_Z / ClusterShader::CLUSTERS_Z_THREADS);
111-
112-
// lighting
113-
114-
uint64_t state = BGFX_STATE_DEFAULT & ~BGFX_STATE_CULL_MASK;
108+
// lighting
115109

116110
bool debugVis = variables["DEBUG_VIS"] == "true";
117111
bgfx::ProgramHandle program = debugVis ? debugVisProgram : lightingProgram;
118112

113+
uint64_t state = BGFX_STATE_DEFAULT & ~BGFX_STATE_CULL_MASK;
114+
119115
for(const Mesh& mesh : scene->meshes)
120116
{
121117
glm::mat4 model = glm::identity<glm::mat4>();
@@ -125,10 +121,11 @@ void ClusteredRenderer::onRender(float dt)
125121
bgfx::setIndexBuffer(mesh.indexBuffer);
126122
const Material& mat = scene->materials[mesh.material];
127123
uint64_t materialState = pbr.bindMaterial(mat);
124+
bgfx::setState(state | materialState);
128125
lights.bindLights(scene);
129126
clusters.bindBuffers();
130-
bgfx::setState(state | materialState);
131-
bgfx::submit(vLighting, program);
127+
// TODO compute bindings don't seem to be preserved despite excluding BGFX_DISCARD_COMPUTE
128+
bgfx::submit(vLighting, program, 0, BGFX_DISCARD_ALL & ~BGFX_DISCARD_COMPUTE);
132129
}
133130
}
134131

src/Renderer/DeferredRenderer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,9 @@ bool DeferredRenderer::supported()
4646
bool supported = Renderer::supported() &&
4747
// blitting depth texture after geometry pass
4848
(caps->supported & BGFX_CAPS_TEXTURE_BLIT) != 0 &&
49-
// fragment depth available in fragment shader
50-
(caps->supported & BGFX_CAPS_FRAGMENT_DEPTH) != 0 &&
5149
// multiple render targets
52-
caps->limits.maxFBAttachments >= GBufferAttachment::Count; // does depth count as an attachment?
50+
// depth doesn't count as an attachment
51+
caps->limits.maxFBAttachments >= GBufferAttachment::Count - 1;
5352
if(!supported)
5453
return false;
5554

0 commit comments

Comments
 (0)