Skip to content

Commit d38a1c7

Browse files
Merge pull request #544 from Crisspl/3rdparty-upgrade
AMD driver bug workaround fix, glslang fix, spirv-cross sync
2 parents af10b55 + 4662239 commit d38a1c7

File tree

9 files changed

+68
-97
lines changed

9 files changed

+68
-97
lines changed

.gitmodules

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
[submodule "3rdparty/glslang"]
88
path = 3rdparty/glslang
99
url = https://github.com/devshgraphicsprogramming/glslang.git
10+
branch = nabla
1011
[submodule "3rdparty/shaderc"]
1112
path = 3rdparty/shaderc
1213
url = https://github.com/google/shaderc.git
@@ -19,7 +20,7 @@
1920
[submodule "3rdparty/spirv_cross"]
2021
path = 3rdparty/spirv_cross
2122
url = https://github.com/devshgraphicsprogramming/SPIRV-Cross.git
22-
branch = issues1350-2
23+
branch = nabla
2324
[submodule "3rdparty/zlib"]
2425
path = 3rdparty/zlib
2526
url = https://github.com/madler/zlib.git

3rdparty/glslang

examples_tests/26.MultidrawIndirectVSCPUCull/commonVertexShader.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ void impl(uint _objectUUID)
1616

1717
gl_Position = mvp[0]*vPos.x+mvp[1]*vPos.y+mvp[2]*vPos.z+mvp[3];
1818
Color = vec4(0.4,0.4,1.0,1.0);
19-
Normal = normalize(drawData[_objectUUID].normalMatrix*vNormal); //have to normalize twice because of normal quantization
19+
Normal = normalize(irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(drawData[_objectUUID].normalMatrix)*vNormal); //have to normalize twice because of normal quantization
2020
}

examples_tests/31.SkinningDataBenchmark/1.vert

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "common.glsl"
22

3+
#include <irr/builtin/glsl/broken_driver_workarounds/amd.glsl>
4+
35
struct BoneNormalMatPair
46
{
57
mat4 boneMatrix;
@@ -26,11 +28,11 @@ void main()
2628
const vec3 normal = vec3(1.0, 2.0, 3.0);
2729
#endif
2830
#ifndef BENCHMARK
29-
gl_Position = matrices[boneID].boneMatrix * vec4(pos, 1.0);
30-
vNormal = mat3(matrices[boneID].normalMatrix) * normalize(normal);
31+
gl_Position = irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(matrices[boneID].boneMatrix) * vec4(pos, 1.0);
32+
vNormal = mat3(irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(matrices[boneID].normalMatrix)) * normalize(normal);
3133
#else
32-
gl_Position = matrices[boneID].boneMatrix * vec4(pos, 1.0);
33-
gl_Position.xyz += mat3(matrices[boneID].normalMatrix) * normal;
34+
gl_Position = irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(matrices[boneID].boneMatrix) * vec4(pos, 1.0);
35+
gl_Position.xyz += mat3(irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(matrices[boneID].normalMatrix)) * normal;
3436
#endif
3537

3638
}

examples_tests/31.SkinningDataBenchmark/2.vert

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "common.glsl"
22

3+
#include <irr/builtin/glsl/broken_driver_workarounds/amd.glsl>
4+
35
layout(std430, set = 0, binding = 0, row_major) restrict readonly buffer BoneMatrices
46
{
57
mat4 boneMatrix[MAT_MAX_CNT];
@@ -22,10 +24,10 @@ void main()
2224
#endif
2325

2426
#ifndef BENCHMARK
25-
gl_Position = boneMatrix[boneID] * vec4(pos, 1.0);
26-
vNormal = mat3(normalMatrix[boneID]) * normalize(normal);
27+
gl_Position = irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(boneMatrix[boneID]) * vec4(pos, 1.0);
28+
vNormal = mat3(irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(normalMatrix[boneID])) * normalize(normal);
2729
#else
28-
gl_Position = boneMatrix[boneID] * vec4(pos, 1.0);
29-
gl_Position.xyz += mat3(normalMatrix[boneID]) * normal;
30+
gl_Position = irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(boneMatrix[boneID]) * vec4(pos, 1.0);
31+
gl_Position.xyz += mat3(irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(normalMatrix[boneID])) * normal;
3032
#endif
3133
}

examples_tests/31.SkinningDataBenchmark/5.vert

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#extension GL_KHR_shader_subgroup_ballot : require
44

5+
#include <irr/builtin/glsl/broken_driver_workarounds/amd.glsl>
6+
57
struct BoneData
68
{
79
mat4 boneMatrix;
@@ -103,9 +105,14 @@ BoneData getBone(uint _boneID)
103105

104106
return toBoneData(retval);
105107
}
106-
else
108+
else
107109
//#endif
108-
return boneSSBO_structs.data[_boneID];
110+
{
111+
BoneData retval;
112+
retval.boneMatrix = irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(boneSSBO_structs.data[_boneID].boneMatrix);
113+
retval.normalMatrix = irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier(boneSSBO_structs.data[_boneID].normalMatrix);
114+
return retval;
115+
}
109116
}
110117

111118
void main()

include/irr/builtin/glsl/bxdf/brdf/specular/blinn_phong.glsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ float irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(in float NdotH, in float NdotV_
6565
float NG = irr_glsl_blinn_phong(NdotH, n);
6666
if (a2>FLT_MIN)
6767
NG *= irr_glsl_beckmann_smith_correlated(NdotV_squared, NdotL2, a2);
68-
return scalar_part;
68+
return NG;
6969
}
7070
float irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(in float NdotH, in float NdotV_squared, in float NdotL2, in float n)
7171
{
@@ -96,8 +96,8 @@ float irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(in float NdotH, in float NdotH2
9696
{
9797
float d = irr_glsl_blinn_phong(NdotH, 1.0/(1.0-NdotH2), TdotH2, BdotH2, nx, ny);
9898
if (ax2>FLT_MIN || ay2>FLT_MIN)
99-
NG *= irr_glsl_beckmann_smith_correlated(TdotV2, BdotV2, NdotV_squared, TdotL2, BdotL2, NdotL2, ax2, ay2);
100-
return scalar_part;
99+
d *= irr_glsl_beckmann_smith_correlated(TdotV2, BdotV2, NdotV_squared, TdotL2, BdotL2, NdotL2, ax2, ay2);
100+
return d;
101101
}
102102
float irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(in float NdotH, in float NdotH2, in float TdotH2, in float BdotH2, in float TdotL2, in float BdotL2, in float TdotV2, in float BdotV2, in float NdotV_squared, in float NdotL2, in float nx, in float ny)
103103
{
@@ -109,7 +109,7 @@ float irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(in float NdotH, in float NdotH2
109109

110110
vec3 irr_glsl_blinn_phong_cos_eval_wo_clamps(in float NdotH, in float NdotH2, in float TdotH2, in float BdotH2, in float TdotL2, in float BdotL2, in float maxNdotV, in float TdotV2, in float BdotV2, in float NdotV_squared, in float NdotL2, in float VdotH, in float nx, in float ny, in mat2x3 ior, in float ax2, in float ay2)
111111
{
112-
float scalar_part = irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(NdotH, NdotH2, TdotH2, BdotH2, TdotL2, BdotL2, maxNdotV, TdotV2, BdotV2, NdotV_squared, NdotL2, nx, ny, ax2, ay2);
112+
float scalar_part = irr_glsl_blinn_phong_cos_eval_DG_wo_clamps(NdotH, NdotH2, TdotH2, BdotH2, TdotL2, BdotL2, TdotV2, BdotV2, NdotV_squared, NdotL2, nx, ny, ax2, ay2);
113113

114114
return irr_glsl_fresnel_conductor(ior[0], ior[1], VdotH)*irr_glsl_microfacet_to_light_measure_transform(scalar_part,maxNdotV);
115115
}

source/Irrlicht/COpenGLDriver.cpp

Lines changed: 38 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -51,29 +51,10 @@ class AMDbugfixCompiler : public spirv_cross::Compiler
5151
public:
5252
using spirv_cross::Compiler::Compiler;
5353

54-
enum E_AMD_FIX_FUNCS
55-
{
56-
EAFF_d2x2,
57-
EAFF_d2x3,
58-
EAFF_d2x4,
59-
EAFF_d3x2,
60-
EAFF_d3x3,
61-
EAFF_d3x4,
62-
EAFF_d4x2,
63-
EAFF_d4x3,
64-
EAFF_d4x4,
65-
EAFF_2x2,
66-
EAFF_2x3,
67-
EAFF_2x4,
68-
EAFF_3x2,
69-
EAFF_3x3,
70-
EAFF_3x4,
71-
EAFF_4x2,
72-
EAFF_4x3,
73-
EAFF_4x4,
74-
75-
EAFF_COUNT
76-
};
54+
_IRR_STATIC_INLINE_CONSTEXPR bool ACCOUNT_SSBO = true;
55+
56+
// {numer of possible row counts} * {number of possible col counts} * {number of possible basetypes (float/double)}
57+
_IRR_STATIC_INLINE_CONSTEXPR uint32_t WORKAROUND_FUNCTION_COUNT = 3u*3u*2u;
7758
struct SFunction
7859
{
7960
uint32_t restype;
@@ -88,19 +69,24 @@ class AMDbugfixCompiler : public spirv_cross::Compiler
8869

8970
bool operator<(const SLoad& rhs) const { return offset<rhs.offset; }
9071
};
72+
using workaround_functions_t = std::array<SFunction, WORKAROUND_FUNCTION_COUNT>;
9173

92-
std::pair<irr::core::vector<SLoad>, std::array<SFunction, EAFF_COUNT>> getFixCandidates()
74+
std::pair<irr::core::vector<SLoad>, workaround_functions_t> getFixCandidates()
9375
{
9476
irr::core::vector<uint32_t> vars;//IDs of UBO or SSBO vars (instances)
9577
irr::core::vector<SLoad> loads;//OpLoad's that are potentially going to need the fix
9678

97-
std::array<SFunction, EAFF_COUNT> fixFuncs;
79+
workaround_functions_t fixFuncs;
9880
memset(fixFuncs.data(), 0xff, fixFuncs.size()*sizeof(SFunction));
9981

10082
ir.for_each_typed_id<spirv_cross::SPIRVariable>(//gather all instances of SSBO and UBO blocks
10183
[&](uint32_t, const spirv_cross::SPIRVariable& var) {
10284
auto& type = this->get<spirv_cross::SPIRType>(var.basetype);
103-
if (type.storage == spv::StorageClassUniform || type.storage == spv::StorageClassStorageBuffer)
85+
bool valid = type.storage == spv::StorageClassUniform;
86+
if constexpr (ACCOUNT_SSBO) {
87+
valid = valid || (type.storage == spv::StorageClassStorageBuffer);
88+
}
89+
if (valid)
10490
{
10591
vars.push_back(var.self);
10692
}
@@ -109,53 +95,16 @@ class AMDbugfixCompiler : public spirv_cross::Compiler
10995
[&](uint32_t, const spirv_cross::SPIRFunction& func) {
11096
uint32_t fid = func.self;
11197
const std::string& nm = get_name(fid);
112-
const char expected[] = "irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier_";
113-
if (nm.length()>sizeof(expected)-1u && strncmp(nm.c_str(), expected, sizeof(expected)-1u)==0)
98+
const char expected[] = "irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier";
99+
if (strncmp(nm.c_str(), expected, sizeof(expected)-1u) == 0)
114100
{
115-
uint32_t ix = ~0u;
116-
const char* szstr = nm.c_str()+sizeof(expected)-1u;
117-
if (strncmp(szstr, "dmat2x2", 7) == 0)
118-
ix = EAFF_d2x2;
119-
else if (strncmp(szstr, "dmat2x3", 7) == 0)
120-
ix = EAFF_d2x3;
121-
else if (strncmp(szstr, "dmat2x4", 7) == 0)
122-
ix = EAFF_d2x4;
123-
else if (strncmp(szstr, "dmat3x2", 7) == 0)
124-
ix = EAFF_d3x2;
125-
else if (strncmp(szstr, "dmat3x3", 7) == 0)
126-
ix = EAFF_d3x3;
127-
else if (strncmp(szstr, "dmat3x4", 7) == 0)
128-
ix = EAFF_d3x4;
129-
else if (strncmp(szstr, "dmat4x2", 7) == 0)
130-
ix = EAFF_d4x2;
131-
else if (strncmp(szstr, "dmat4x3", 7) == 0)
132-
ix = EAFF_d4x3;
133-
else if (strncmp(szstr, "dmat4x4", 7) == 0)
134-
ix = EAFF_d4x4;
135-
else if (strncmp(szstr, "mat2x2", 6) == 0)
136-
ix = EAFF_2x2;
137-
else if (strncmp(szstr, "mat2x3", 6) == 0)
138-
ix = EAFF_2x3;
139-
else if (strncmp(szstr, "mat2x4", 6) == 0)
140-
ix = EAFF_2x4;
141-
else if (strncmp(szstr, "mat3x2", 6) == 0)
142-
ix = EAFF_3x2;
143-
else if (strncmp(szstr, "mat3x3", 6) == 0)
144-
ix = EAFF_3x3;
145-
else if (strncmp(szstr, "mat3x4", 6) == 0)
146-
ix = EAFF_3x4;
147-
else if (strncmp(szstr, "mat4x2", 6) == 0)
148-
ix = EAFF_4x2;
149-
else if (strncmp(szstr, "mat4x3", 6) == 0)
150-
ix = EAFF_4x3;
151-
else if (strncmp(szstr, "mat4x4", 6) == 0)
152-
ix = EAFF_4x4;
153-
154-
if (ix < fixFuncs.size())
155-
{
156-
fixFuncs[ix].restype = get_type(func.return_type).self;
157-
fixFuncs[ix].id = fid;
158-
}
101+
auto& param_type = get_type(func.arguments[0].type);
102+
103+
const uint32_t ix = (param_type.basetype==spirv_cross::SPIRType::BaseType::Double ? 1u:0u)*(3u*3u) + (3u*(param_type.vecsize-2u)) + param_type.columns-2u;
104+
105+
assert(ix < fixFuncs.size());
106+
fixFuncs[ix].restype = get_type(func.return_type).self;
107+
fixFuncs[ix].id = fid;
159108
}
160109
}
161110
);
@@ -193,22 +142,25 @@ class AMDbugfixCompiler : public spirv_cross::Compiler
193142
getThisOpLoad = false;
194143
}
195144
}
196-
break;
145+
break;
197146
case spv::OpAccessChain:
198147
{
199148
uint32_t baseptr = ops[2];
200149
auto found = std::find(vars.begin(), vars.end(), baseptr);
201150
if (found != vars.end())
202151
getThisOpLoad = true;
203152
}
204-
break;
153+
break;
205154
case spv::OpFunctionCall:
206155
{
207156
auto& callee = get<spirv_cross::SPIRFunction>(ops[2]);
208157
callstack.push(std::cref(callee));
209158
}
210-
break;
211-
default: break;
159+
break;
160+
default:
161+
// OpLoad should be directly after OpAccessChain, so reset flag if access chained concerned some other op
162+
getThisOpLoad = false;
163+
break;
212164
}
213165
}
214166
}
@@ -1330,7 +1282,7 @@ core::smart_refctd_ptr<IGPUSpecializedShader> COpenGLDriver::createGPUSpecialize
13301282
return nullptr;
13311283
GLfloat a;
13321284

1333-
// #define FIX_AMD_DRIVER_BUG // TODO: @Crisspl get this code manipulation to pass on the `boxFrustCull.comp` shader of ex 26 also update it to the fact i've converted the workaround to an overloaded function (name has now changed!)
1285+
#define FIX_AMD_DRIVER_BUG // TODO: fix to work even when user dont wrap matrix fetch with irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier
13341286
#ifdef FIX_AMD_DRIVER_BUG
13351287
AMDbugfixCompiler comp(reinterpret_cast<const uint32_t*>(spvCode->getPointer()), spvCode->getSize()/4u);
13361288
comp.set_entry_point(EP, asset::ESS2spvExecModel(stage));
@@ -1370,12 +1322,19 @@ core::smart_refctd_ptr<IGPUSpecializedShader> COpenGLDriver::createGPUSpecialize
13701322
break;
13711323
}
13721324
}
1373-
assert(fcall->f_id<(~0u));
1325+
if (fcall->f_id == (~0u))
1326+
{
1327+
os::Printer::log(_specInfo.m_filePathHint,
1328+
"Some of your UBO/SSBO matrix fetches are not wrapped with irr_builtin_glsl_workaround_AMD_broken_row_major_qualifier!",
1329+
ELL_ERROR);
1330+
assert(false);
1331+
}
13741332

13751333
uint32_t* store = spv.data()+ld.offset+ld.len+extraOffset;
13761334
store[2] = fcallId;//store result of new OpFunctionCall instead of result of OpStore
13771335

1378-
spv.insert(spv.begin()+ld.offset+ld.len+extraOffset, fcall_, fcall_+sizeof(fcall_)/sizeof(*fcall_));
1336+
const uint32_t offset = ld.offset+ld.len+extraOffset;
1337+
spv.insert(spv.begin()+offset, fcall_, fcall_+sizeof(fcall_)/sizeof(*fcall_));
13791338
extraOffset += sizeof(fcall_)/sizeof(*fcall_);
13801339
}
13811340
spvCode = core::make_smart_refctd_ptr<asset::ICPUBuffer>(spv.size()*4ull);

0 commit comments

Comments
 (0)