Skip to content

Commit af10b55

Browse files
Merge pull request #539 from Crisspl/3rdparty-upgrade
3rdparty SPIRV/GLSL upgrade and SkinningBenchmark subgroup bone fetch
2 parents c20e6cd + c6b0347 commit af10b55

File tree

13 files changed

+198
-34
lines changed

13 files changed

+198
-34
lines changed

.gitmodules

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
url = https://github.com/lz4/lz4.git
1919
[submodule "3rdparty/spirv_cross"]
2020
path = 3rdparty/spirv_cross
21-
url = https://github.com/KhronosGroup/SPIRV-Cross.git
21+
url = https://github.com/devshgraphicsprogramming/SPIRV-Cross.git
22+
branch = issues1350-2
2223
[submodule "3rdparty/zlib"]
2324
path = 3rdparty/zlib
2425
url = https://github.com/madler/zlib.git

3rdparty/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ set(ENABLE_SPVREMAPPER OFF CACHE BOOL "SPVRemapper is a spirv compression experi
135135
set(ENABLE_GLSLANG_BINARIES OFF CACHE BOOL "Build glslangValidator and spirv-remap?")
136136
set(BUILD_TESTING OFF CACHE BOOL "Enable glslang build testing?")
137137
add_subdirectory(glslang glslang EXCLUDE_FROM_ALL)
138+
# because we're integrating glslang via add_subdirectory(), build_info.h is generated to wrong directory (relative to Nabla root) so i have to add extra include dir
139+
target_include_directories(SPIRV PUBLIC "${GLSLANG_GENERATED_INCLUDEDIR}")
138140

139141
set(SHADERC_SKIP_TESTS ON CACHE BOOL "Skip shaderc tests?")
140142
set(SHADERC_SKIP_INSTALL ON CACHE BOOL "Install shaderc?")
@@ -470,6 +472,8 @@ set(IRR_3RDPARTY_TARGETS
470472
glslang
471473
OGLCompiler
472474
OSDependent
475+
MachineIndependent
476+
GenericCodeGen
473477
SPIRV
474478
Iex
475479
IexMath

3rdparty/SPIRV-Tools

Submodule SPIRV-Tools updated 460 files

3rdparty/glslang

3rdparty/shaderc

Submodule shaderc updated 79 files

3rdparty/spirv_cross

Submodule spirv_cross updated 479 files
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#include "common.glsl"
2+
3+
#extension GL_KHR_shader_subgroup_ballot : require
4+
5+
struct BoneData
6+
{
7+
mat4 boneMatrix;
8+
mat4x3 normalMatrix;
9+
};
10+
11+
layout(std430, set = 0, binding = 0, row_major) readonly buffer BoneMatrices_struct
12+
{
13+
BoneData data[];
14+
} boneSSBO_structs;
15+
layout(std430, set = 0, binding = 1) readonly buffer BoneMatrices_dword
16+
{
17+
uint data[];
18+
} boneSSBO_dwords;
19+
20+
#ifndef BENCHMARK
21+
layout(location = 0) in vec3 pos;
22+
layout(location = 3) in vec3 normal;
23+
layout(location = 0) out vec3 vNormal;
24+
#endif
25+
layout(location = 4) in uint boneID;
26+
27+
#define OBJ_DWORDS 32 // sizeof(BoneData), must be PoT
28+
struct BoneData_dword
29+
{
30+
uint data[OBJ_DWORDS];
31+
};
32+
BoneData toBoneData(in BoneData_dword bone)
33+
{
34+
BoneData retval;
35+
//tpose because it was loaded as row_major
36+
//one-liner because glslang doesnt support multiline preproc definitions
37+
#define GET_BONE_MATRIX_COL(c) retval.boneMatrix[c].x = uintBitsToFloat(bone.data[c]);retval.boneMatrix[c].y = uintBitsToFloat(bone.data[c+4]);retval.boneMatrix[c].z = uintBitsToFloat(bone.data[c+8]);retval.boneMatrix[c].w = uintBitsToFloat(bone.data[c+12])
38+
39+
GET_BONE_MATRIX_COL(0);
40+
GET_BONE_MATRIX_COL(1);
41+
GET_BONE_MATRIX_COL(2);
42+
GET_BONE_MATRIX_COL(3);
43+
44+
//tpose because it was loaded as row_major
45+
//one-liner because glslang doesnt support multiline preproc definitions
46+
#define GET_NORMAL_MATRIX_COL(c) retval.normalMatrix[c].x = uintBitsToFloat(bone.data[16+c]);retval.normalMatrix[c].y = uintBitsToFloat(bone.data[16+c+4]);retval.normalMatrix[c].z = uintBitsToFloat(bone.data[16+c+8])
47+
48+
GET_NORMAL_MATRIX_COL(0);
49+
GET_NORMAL_MATRIX_COL(1);
50+
GET_NORMAL_MATRIX_COL(2);
51+
GET_NORMAL_MATRIX_COL(3);
52+
53+
return retval;
54+
}
55+
#define COALESCING_DWORDS_LOG2 4 // GCN can fetch only 64bytes in a single request
56+
#define SUBGROUP_THRESH 16
57+
58+
BoneData getBone(uint _boneID)
59+
{
60+
//#ifdef IRR_GL_KHR_shader_subgroup_basic_size
61+
// if a set of invocations are active without gaps we can do a fast path
62+
const uvec4 activeMask = subgroupBallot(true);
63+
const int incr = int(subgroupBallotBitCount(activeMask));
64+
const int incrLog2 = int(subgroupBallotFindMSB(activeMask));
65+
if ((0x1<<incrLog2)==incr && incrLog2>=COALESCING_DWORDS_LOG2) // contiguous segment of active warps is required
66+
{
67+
BoneData_dword retval;
68+
uint boneID = _boneID*uint(OBJ_DWORDS);
69+
70+
// basically fetch bones for one target invocation at a time
71+
uvec2 outstandingLoadsMask = activeMask.xy;
72+
// maybe unroll a few times manually
73+
while (any(notEqual(outstandingLoadsMask,uvec2(0u))))
74+
{
75+
// more work required to make this work with gl_SubgroupSize > OBJ_DWORDS but good enough to benchmark
76+
uint subgroupBoneID = subgroupBroadcast(boneID,subgroupBallotFindLSB(uvec4(outstandingLoadsMask,0u,0u)));
77+
bool willLoadBone = subgroupBoneID==boneID;
78+
outstandingLoadsMask ^= subgroupBallot(willLoadBone).xy;
79+
80+
81+
uint dynamically_uniform_addr = boneID+gl_SubgroupInvocationID;
82+
// use all SIMD lanes to load but then only some to read from subgroup registers
83+
uint tmp = boneSSBO_dwords.data[dynamically_uniform_addr];
84+
const bool notEnoughInvocations = incrLog2<OBJ_DWORDS;
85+
86+
if (willLoadBone)
87+
{
88+
int oit=0, iit=0;
89+
for (int j=0; j<SUBGROUP_THRESH; j++)
90+
retval.data[oit++] = subgroupBroadcast(tmp,iit++);
91+
}
92+
if (notEnoughInvocations)
93+
{
94+
tmp = boneSSBO_dwords.data[dynamically_uniform_addr+incr];
95+
}
96+
if (willLoadBone)
97+
{
98+
int oit=SUBGROUP_THRESH, iit=notEnoughInvocations ? SUBGROUP_THRESH:0;
99+
for (int j=0; j<SUBGROUP_THRESH; j++)
100+
retval.data[oit++] = subgroupBroadcast(tmp,iit++);
101+
}
102+
}
103+
104+
return toBoneData(retval);
105+
}
106+
else
107+
//#endif
108+
return boneSSBO_structs.data[_boneID];
109+
}
110+
111+
void main()
112+
{
113+
#ifdef BENCHMARK
114+
const vec3 pos = vec3(1.0, 2.0, 3.0);
115+
const vec3 normal = vec3(1.0, 2.0, 3.0);
116+
#endif
117+
BoneData bone = getBone(boneID);
118+
#ifndef BENCHMARK
119+
gl_Position = bone.boneMatrix * vec4(pos, 1.0);
120+
vNormal = mat3(bone.normalMatrix) * normalize(normal);
121+
#else
122+
gl_Position = bone.boneMatrix * vec4(pos, 1.0);
123+
gl_Position.xyz += mat3(bone.normalMatrix) * normal;
124+
#endif
125+
126+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#version 460 core
2+
#define BENCHMARK
3+
#include "5.vert"
Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
#ifdef __cplusplus
2+
#define uint uint32_t
3+
#endif
14
#define MAX_OBJ_CNT 3000
25
#define MAX_BONE_CNT 37
3-
#define MAT_MAX_CNT (MAX_OBJ_CNT * MAX_BONE_CNT)
4-
#define BONE_VEC_MAX_CNT (MAT_MAX_CNT * 4)
5-
#define NORM_VEC_MAX_CNT (MAT_MAX_CNT * 3)
6-
#define BONE_COMP_MAX_CNT (MAT_MAX_CNT * 16)
7-
#define NORM_COMP_MAX_CNT (MAT_MAX_CNT * 9)
6+
#define MAT_MAX_CNT uint (MAX_OBJ_CNT * MAX_BONE_CNT)
7+
#define BONE_VEC_MAX_CNT uint (MAT_MAX_CNT * 4)
8+
#define NORM_VEC_MAX_CNT uint (MAT_MAX_CNT * 3)
9+
#define BONE_COMP_MAX_CNT uint (MAT_MAX_CNT * 16)
10+
#define NORM_COMP_MAX_CNT uint (MAT_MAX_CNT * 9)

0 commit comments

Comments
 (0)