Skip to content

Commit 0090a6a

Browse files
committed
Moved away from structs to deal with minimal alignment confusion
1 parent 635e4be commit 0090a6a

File tree

6 files changed

+95
-48
lines changed

6 files changed

+95
-48
lines changed

Source/StratusRendererBackend.cpp

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,20 @@ void OpenGLDebugCallback(GLenum source, GLenum type, GLuint id,
2626

2727
// Matches the definition in vpl_tiled_deferred_culling.glsl
2828
// See https://fvcaputo.github.io/2019/02/06/memory-alignment.html for alignment info
29-
struct GpuVirtualPointLightData {
30-
alignas(16) glm::vec4 lightPosition = glm::vec4(0.0f);
31-
alignas(16) glm::vec4 lightColor = glm::vec4(0.0f);
32-
alignas(16) glm::vec4 shadowFarPlaneRadius = glm::vec4(0.0f); // last element padding
33-
alignas(16) glm::vec4 numShadowSamples = glm::vec4(0.0f); // last 3 elements padding
29+
struct alignas(16) GpuVec {
30+
float v[4];
31+
32+
GpuVec(float x, float y, float z, float w) {
33+
v[0] = x;
34+
v[1] = y;
35+
v[2] = z;
36+
v[3] = w;
37+
}
38+
39+
GpuVec(float xyzw) : GpuVec(xyzw, xyzw, xyzw, xyzw) {}
40+
GpuVec(const glm::vec4& v) : GpuVec(v[0], v[1], v[2], v[3]) {}
41+
GpuVec(const glm::vec3& v) : GpuVec(glm::vec4(v, 0.0f)) {}
42+
GpuVec() : GpuVec(0.0f) {}
3443
};
3544

3645
static void printGLInfo(const GFXConfig & config) {
@@ -93,6 +102,8 @@ static void printGLInfo(const GFXConfig & config) {
93102
}
94103

95104
RendererBackend::RendererBackend(const uint32_t width, const uint32_t height, const std::string& appName) {
105+
static_assert(sizeof(GpuVec) == 16, "Memory alignment must match up with GLSL");
106+
96107
STRATUS_LOG << "Initializing SDL video" << std::endl;
97108
if (SDL_Init(SDL_INIT_VIDEO) != 0) {
98109
STRATUS_ERROR << "Unable to initialize sdl2" << std::endl;
@@ -325,8 +336,12 @@ void RendererBackend::_InitializeVplData() {
325336
const Bitfield flags = GPU_DYNAMIC_DATA | GPU_MAP_READ | GPU_MAP_WRITE;
326337
std::vector<int> visibleIndicesData(_state.vpls.maxTotalVirtualPointLightsPerFrame, 0);
327338
_state.vpls.vplVisibleIndices = GpuBuffer((const void *)visibleIndicesData.data(), sizeof(int) * visibleIndicesData.size(), flags);
328-
std::vector<GpuVirtualPointLightData> vplData(_state.vpls.maxTotalVirtualPointLightsPerFrame, GpuVirtualPointLightData());
329-
_state.vpls.vplLightData = GpuBuffer((const void *)vplData.data(), sizeof(GpuVirtualPointLightData) * vplData.size(), flags);
339+
_state.vpls.vplPositions = GpuBuffer(nullptr, sizeof(GpuVec) * _state.vpls.maxTotalVirtualPointLightsPerFrame, flags);
340+
_state.vpls.vplColors = GpuBuffer(nullptr, sizeof(GpuVec) * _state.vpls.maxTotalVirtualPointLightsPerFrame, flags);
341+
_state.vpls.vplShadowFactors = GpuBuffer(nullptr, sizeof(float) * _state.vpls.maxTotalVirtualPointLightsPerFrame, flags);
342+
_state.vpls.vplFarPlanes = GpuBuffer(nullptr, sizeof(float) * _state.vpls.maxTotalVirtualPointLightsPerFrame, flags);
343+
_state.vpls.vplRadii = GpuBuffer(nullptr, sizeof(float) * _state.vpls.maxTotalVirtualPointLightsPerFrame, flags);
344+
_state.vpls.vplShadowSamples = GpuBuffer(nullptr, sizeof(float) * _state.vpls.maxTotalVirtualPointLightsPerFrame, flags);
330345
_state.vpls.vplNumVisible = GpuBuffer(nullptr, sizeof(int), flags);
331346
}
332347

@@ -1277,29 +1292,37 @@ void RendererBackend::_PerformVirtualPointLightCulling(std::vector<std::pair<Lig
12771292
}
12781293

12791294
// Pack data into system memory
1280-
std::vector<GpuVirtualPointLightData> vplData(perVPLDistToViewer.size());
1295+
std::vector<GpuVec> lightPositions(perVPLDistToViewer.size());
1296+
std::vector<GpuVec> lightColors(perVPLDistToViewer.size());
1297+
std::vector<float> lightFarPlanes(perVPLDistToViewer.size());
1298+
std::vector<float> lightRadii(perVPLDistToViewer.size());
1299+
std::vector<float> lightShadowSamples(perVPLDistToViewer.size());
12811300
for (size_t i = 0; i < perVPLDistToViewer.size(); ++i) {
1282-
GpuVirtualPointLightData data;
12831301
VirtualPointLight * point = (VirtualPointLight *)perVPLDistToViewer[i].first.get();
1284-
data.lightPosition = glm::vec4(point->position, 1.0f);
1285-
data.shadowFarPlaneRadius = glm::vec4(0.0f, point->getFarPlane(), point->getRadius(), 0.0f);
1286-
data.lightColor = glm::vec4(point->getBaseColor() * point->getIntensity(), 1.0f);
1287-
data.numShadowSamples = glm::vec4(point->GetNumShadowSamples(), 0.0f, 0.0f, 0.0f);
1288-
vplData[i] = std::move(data);
1302+
lightPositions[i] = GpuVec(glm::vec4(point->position, 1.0f));
1303+
lightFarPlanes[i] = point->getFarPlane();
1304+
lightRadii[i] = point->getRadius();
1305+
lightColors[i] = GpuVec(glm::vec4(point->getBaseColor() * point->getIntensity(), 1.0f));
1306+
lightShadowSamples[i] = float(point->GetNumShadowSamples());
12891307
}
12901308

1291-
_state.vplCulling->bind();
1292-
12931309
// Move data to GPU memory
1294-
_state.vpls.vplLightData.CopyDataToBuffer(0, sizeof(GpuVirtualPointLightData) * vplData.size(), (const void *)vplData.data());
1310+
_state.vpls.vplPositions.CopyDataToBuffer(0, sizeof(GpuVec) * lightPositions.size(), (const void *)lightPositions.data());
1311+
_state.vpls.vplColors.CopyDataToBuffer(0, sizeof(GpuVec) * lightColors.size(), (const void *)lightColors.data());
1312+
_state.vpls.vplFarPlanes.CopyDataToBuffer(0, sizeof(float) * lightFarPlanes.size(), (const void *)lightFarPlanes.data());
1313+
_state.vpls.vplRadii.CopyDataToBuffer(0, sizeof(float) * lightRadii.size(), (const void *)lightRadii.data());
1314+
_state.vpls.vplShadowSamples.CopyDataToBuffer(0, sizeof(float) * lightShadowSamples.size(), (const void *)lightShadowSamples.data());
1315+
1316+
_state.vplCulling->bind();
12951317

12961318
// Set up # visible atomic counter
12971319
int numVisible = 0;
12981320
_state.vpls.vplNumVisible.CopyDataToBuffer(0, sizeof(int), (const void *)&numVisible);
12991321
_state.vpls.vplNumVisible.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 1);
13001322

13011323
// Bind light data and visibility indices
1302-
_state.vpls.vplLightData.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 0);
1324+
_state.vpls.vplShadowFactors.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 0);
1325+
_state.vpls.vplPositions.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 4);
13031326
_state.vpls.vplVisibleIndices.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 3);
13041327

13051328
_InitCoreCSMData(_state.vplCulling.get());
@@ -1312,7 +1335,8 @@ void RendererBackend::_PerformVirtualPointLightCulling(std::vector<std::pair<Lig
13121335

13131336
// Bind inputs
13141337
_state.vplTileDeferredCulling->bindTexture("gPosition", _state.buffer.position);
1315-
_state.vpls.vplLightData.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 0);
1338+
_state.vpls.vplPositions.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 0);
1339+
_state.vpls.vplRadii.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 7);
13161340
_state.vpls.vplNumVisible.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 1);
13171341
_state.vpls.vplVisibleIndices.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 3);
13181342

@@ -1374,7 +1398,11 @@ void RendererBackend::_ComputeVirtualPointLightGlobalIllumination(const std::vec
13741398
// All relevant rendering data is moved to the GPU during the light cull phase
13751399
_state.vpls.vplNumLightsVisiblePerTile.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 3);
13761400
_state.vpls.vplLightIndicesVisiblePerTile.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 4);
1377-
_state.vpls.vplLightData.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 5);
1401+
_state.vpls.vplPositions.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 5);
1402+
_state.vpls.vplColors.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 6);
1403+
_state.vpls.vplRadii.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 7);
1404+
_state.vpls.vplFarPlanes.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 8);
1405+
_state.vpls.vplShadowSamples.BindBase(GpuBaseBindingPoint::SHADER_STORAGE_BUFFER, 9);
13781406

13791407
_state.vplGlobalIllumination->bindTexture("screen", _state.lightingColorBuffer);
13801408
_state.vplGlobalIllumination->bindTexture("gPosition", _state.buffer.position);

Source/StratusRendererBackend.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,12 @@ namespace stratus {
183183
int maxTotalVirtualLightsPerTile = maxTotalVirtualPointLightsPerFrame;
184184
GpuBuffer vplLightIndicesVisiblePerTile;
185185
GpuBuffer vplNumLightsVisiblePerTile;
186-
GpuBuffer vplLightData;
186+
GpuBuffer vplPositions;
187+
GpuBuffer vplColors;
188+
GpuBuffer vplShadowFactors;
189+
GpuBuffer vplFarPlanes;
190+
GpuBuffer vplRadii;
191+
GpuBuffer vplShadowSamples;
187192
GpuBuffer vplVisibleIndices;
188193
GpuBuffer vplNumVisible;
189194
FrameBuffer vplGIFbo;

resources/shaders/pbr_vpl_gi.fs

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,24 @@ layout (std430, binding = 4) readonly buffer vplIndices {
4545
};
4646

4747
// Light positions
48-
layout (std430, binding = 5) readonly buffer vplData {
49-
VirtualPointLight lightData[];
48+
layout (std430, binding = 5) readonly buffer vplPositions {
49+
vec4 lightPositions[];
50+
};
51+
52+
layout (std430, binding = 6) readonly buffer vplColors {
53+
vec4 lightColors[];
54+
};
55+
56+
layout (std430, binding = 7) readonly buffer vplRadii {
57+
float lightRadii[];
58+
};
59+
60+
layout (std430, binding = 8) readonly buffer vplFarPlanes {
61+
float lightFarPlanes[];
62+
};
63+
64+
layout (std430, binding = 9) readonly buffer vplNumSamples {
65+
float lightNumSamples[];
5066
};
5167

5268
vec3 performLightingCalculations(vec3 screenColor, vec2 pixelCoords, vec2 texCoords) {
@@ -78,20 +94,19 @@ vec3 performLightingCalculations(vec3 screenColor, vec2 pixelCoords, vec2 texCoo
7894
for (int baseLightIndex = 0 ; baseLightIndex < numActiveVPLs; baseLightIndex += 1) {
7995
// Calculate true light index via lookup into active light table
8096
int lightIndex = activeLightIndicesPerTile[baseTileIndex + baseLightIndex];
81-
VirtualPointLight vpl = lightData[lightIndex];
82-
vec3 lightPosition = vpl.lightPosition.xyz;
97+
vec3 lightPosition = lightPositions[lightIndex].xyz;
8398
float distance = length(lightPosition - fragPos);
84-
vec3 lightColor = vpl.lightColor.xyz;
85-
if (distance > vpl.shadowFarPlaneRadius.z) continue;
99+
vec3 lightColor = lightColors[lightIndex].xyz;
100+
if (distance > lightRadii[lightIndex]) continue;
86101
if (length(vplColor) > (length(infiniteLightColor) * 0.25)) break;
87102

88-
int numSamples = 3;//int(vpl.numShadowSamples.x);
103+
int numSamples = 3;//int(lightNumSamples[lightIndex]);
89104
// This solves an error where sometimes numShadowSamples seems to be uninitialized to some huge
90105
// value - must fix
91-
//numSamples = numSamples > 64 ? 3 : numSamples;
92-
//float shadowFactor = 0.0;
106+
//if (numSamples > 64) continue;
107+
float shadowFactor = 0.0;
93108
if (length(lightPosition - viewPosition) < 135) {
94-
shadowFactor = calculateShadowValue(shadowCubeMaps[lightIndex], vpl.shadowFarPlaneRadius.y, fragPos, lightPosition, dot(lightPosition - fragPos, normal), numSamples);
109+
shadowFactor = calculateShadowValue(shadowCubeMaps[lightIndex], lightFarPlanes[lightIndex], fragPos, lightPosition, dot(lightPosition - fragPos, normal), numSamples);
95110
}
96111
// Depending on how visible this VPL is to the infinite light, we want to constrain how bright it's allowed to be
97112
//shadowFactor = lerp(shadowFactor, 0.0, vpl.shadowFactor);

resources/shaders/vpl_light_cull.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
// This changes with std430 where it enforces equivalency between OpenGL and C/C++ float arrays
2121
// by tightly packing them.
2222
layout (std430, binding = 0) buffer vplLightData {
23-
VirtualPointLight lightData[];
23+
float shadowFactors[];
24+
};
25+
26+
layout (std430, binding = 4) buffer vplPositions {
27+
vec4 lightPositions[];
2428
};
2529

2630
layout (std430, binding = 1) buffer numVisibleVPLs {
@@ -33,15 +37,14 @@
3337

3438
void main() {
3539
int index = int(gl_GlobalInvocationID.x);
36-
VirtualPointLight vpl = lightData[index];
37-
vec3 lightPos = vpl.lightPosition.xyz;
40+
vec3 lightPos = lightPositions[index].xyz;
3841
vec3 cascadeBlends = vec3(dot(cascadePlanes[0], vec4(lightPos, 1.0)),
3942
dot(cascadePlanes[1], vec4(lightPos, 1.0)),
4043
dot(cascadePlanes[2], vec4(lightPos, 1.0)));
4144
float shadowFactor = 1.0 - calculateInfiniteShadowValue(vec4(lightPos, 1.0), cascadeBlends, infiniteLightDirection);
4245
if (shadowFactor < 0.95) {
4346
int next = atomicAdd(numVisible, 1);
44-
vpl.shadowFarPlaneRadius.x = shadowFactor;
47+
shadowFactors[index] = shadowFactor;
4548
vplVisibleIndex[next] = index;
4649
}
4750
}

resources/shaders/vpl_tiled_deferred_culling.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,12 @@
2525
//
2626
// This changes with std430 where it enforces equivalency between OpenGL and C/C++ float arrays
2727
// by tightly packing them.
28-
layout (std430, binding = 0) readonly buffer vplLightData {
29-
VirtualPointLight lightData[];
28+
layout (std430, binding = 0) readonly buffer vplLightPositions {
29+
vec4 lightPositions[];
30+
};
31+
32+
layout (std430, binding = 7) readonly buffer vplLightRadii {
33+
float lightRadii[];
3034
};
3135

3236
layout (std430, binding = 1) readonly buffer numVisibleVPLs {
@@ -71,9 +75,8 @@ void main() {
7175

7276
for (int i = 0; i < numVisible; ++i) {
7377
int lightIndex = vplVisibleIndex[i];
74-
VirtualPointLight vpl = lightData[lightIndex];
75-
float distance = length(vpl.lightPosition.xyz - fragPos);
76-
float radius = vpl.shadowFarPlaneRadius.z;
78+
float distance = length(lightPositions[lightIndex].xyz - fragPos);
79+
float radius = lightRadii[lightIndex];
7780
if (distance > radius) continue;
7881

7982
int prev = atomicAdd(activeLightMarker[lightIndex], 1);

resources/shaders/vpl_tiled_deferred_culling.glsl

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,4 @@ STRATUS_GLSL_VERSION
33
// These needs to match what is in the renderer backend!
44
// TODO: Find a better way to sync these values with renderer
55
#define MAX_TOTAL_VPLS_PER_FRAME (128)
6-
#define MAX_VPLS_PER_TILE MAX_TOTAL_VPLS_PER_FRAME
7-
8-
struct VirtualPointLight {
9-
vec4 lightPosition;
10-
vec4 lightColor;
11-
vec4 shadowFarPlaneRadius; // last element padding
12-
vec4 numShadowSamples; // last 3 elements padding
13-
};
6+
#define MAX_VPLS_PER_TILE MAX_TOTAL_VPLS_PER_FRAME

0 commit comments

Comments
 (0)