Skip to content

Commit 938f790

Browse files
committed
Ballot bitcount test finishing touches
1 parent cb636ee commit 938f790

File tree

5 files changed

+29
-24
lines changed

5 files changed

+29
-24
lines changed

examples_tests/48.ArithmeticUnitTest/main.cpp

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ struct and
1818
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = ~0ull; // this should be a reinterpret cast
1919

2020
inline T operator()(T left, T right) { return left & right; }
21-
21+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
2222
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "and";
2323
};
2424
template<typename T>
@@ -28,7 +28,7 @@ struct xor
2828
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = 0ull; // this should be a reinterpret cast
2929

3030
inline T operator()(T left, T right) { return left ^ right; }
31-
31+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
3232
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "xor";
3333
};
3434
template<typename T>
@@ -38,7 +38,7 @@ struct or
3838
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = 0ull; // this should be a reinterpret cast
3939

4040
inline T operator()(T left, T right) { return left | right; }
41-
41+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
4242
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "or";
4343
};
4444
template<typename T>
@@ -48,7 +48,7 @@ struct add
4848
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = T(0);
4949

5050
inline T operator()(T left, T right) { return left + right; }
51-
51+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
5252
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "add";
5353
};
5454
template<typename T>
@@ -58,7 +58,7 @@ struct mul
5858
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = T(1);
5959

6060
inline T operator()(T left, T right) { return left * right; }
61-
61+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
6262
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "mul";
6363
};
6464
template<typename T>
@@ -68,7 +68,7 @@ struct min
6868
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = std::numeric_limits<T>::max();
6969

7070
inline T operator()(T left, T right) { return std::min<T>(left, right); }
71-
71+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
7272
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "min";
7373
};
7474
template<typename T>
@@ -78,22 +78,21 @@ struct max
7878
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = std::numeric_limits<T>::lowest();
7979

8080
inline T operator()(T left, T right) { return std::max<T>(left, right); }
81-
81+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
8282
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "max";
8383
};
8484
template<typename T>
85-
struct bitcount
85+
struct countBits
8686
{
8787
using type_t = T;
8888
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = T(0);
8989

90-
inline T operator()(T left, T right) { return T(0); }
91-
90+
inline T operator()(T left, T right) { return left + (right&1u); }
91+
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = true;
9292
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "bitcount";
9393
};
9494

9595

96-
9796
//subgroup method emulations on the CPU, to verify the results of the GPU methods
9897
template<class CRTP, typename T>
9998
struct emulatedSubgroupCommon
@@ -122,7 +121,6 @@ struct emulatedSubgroupReduction : emulatedSubgroupCommon<emulatedSubgroupReduct
122121
red = OP()(red,subgroupData[i]);
123122
std::fill(outSubgroupData,outSubgroupData+clampedSubgroupSize,red);
124123
}
125-
126124
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "subgroup reduction";
127125
};
128126
template<class OP>
@@ -136,7 +134,6 @@ struct emulatedSubgroupScanExclusive : emulatedSubgroupCommon<emulatedSubgroupSc
136134
for (auto i=1u; i<clampedSubgroupSize; i++)
137135
outSubgroupData[i] = OP()(outSubgroupData[i-1u],subgroupData[i-1u]);
138136
}
139-
140137
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "subgroup exclusive scan";
141138
};
142139
template<class OP>
@@ -150,7 +147,6 @@ struct emulatedSubgroupScanInclusive : emulatedSubgroupCommon<emulatedSubgroupSc
150147
for (auto i=1u; i<clampedSubgroupSize; i++)
151148
outSubgroupData[i] = OP()(outSubgroupData[i-1u],subgroupData[i]);
152149
}
153-
154150
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "subgroup inclusive scan";
155151
};
156152

@@ -162,12 +158,11 @@ struct emulatedWorkgroupReduction
162158

163159
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
164160
{
165-
type_t red = workgroupData[0];
161+
type_t red = OP::runOPonFirst ? OP()(0, workgroupData[0]) : workgroupData[0];
166162
for (auto i=1u; i<workgroupSize; i++)
167163
red = OP()(red,workgroupData[i]);
168164
std::fill(outputData,outputData+workgroupSize,red);
169165
}
170-
171166
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "workgroup reduction";
172167
};
173168
template<class OP>
@@ -177,11 +172,10 @@ struct emulatedWorkgroupScanExclusive
177172

178173
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
179174
{
180-
outputData[0u] = OP::IdentityElement;
175+
outputData[0u] = OP::runOPonFirst ? OP()(0, workgroupData[0]) : OP::IdentityElement;
181176
for (auto i=1u; i<workgroupSize; i++)
182177
outputData[i] = OP()(outputData[i-1u],workgroupData[i-1u]);
183178
}
184-
185179
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "workgroup exclusive scan";
186180
};
187181
template<class OP>
@@ -191,11 +185,10 @@ struct emulatedWorkgroupScanInclusive
191185

192186
inline void operator()(type_t* outputData, const type_t* workgroupData, uint32_t workgroupSize, uint32_t subgroupSize)
193187
{
194-
outputData[0u] = workgroupData[0u];
188+
outputData[0u] = OP::runOPonFirst ? OP()(0, workgroupData[0]) : workgroupData[0u];
195189
for (auto i=1u; i<workgroupSize; i++)
196190
outputData[i] = OP()(outputData[i-1u],workgroupData[i]);
197191
}
198-
199192
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "workgroup inclusive scan";
200193
};
201194

@@ -246,7 +239,7 @@ bool validateResults(video::IVideoDriver* driver, const uint32_t* inputData, con
246239
for (uint32_t localInvocationIndex=0u; localInvocationIndex<workgroupSize; localInvocationIndex++)
247240
if (tmp[localInvocationIndex]!=dataFromBuffer[workgroupOffset+localInvocationIndex])
248241
{
249-
os::Printer::log("Failed test #" + std::to_string(workgroupSize) + " (" + Arithmetic<OP<uint32_t>>::name + ") (" + OP<uint32_t>::name + ")", ELL_ERROR);
242+
os::Printer::log("Failed test #" + std::to_string(workgroupSize) + " (" + Arithmetic<OP<uint32_t>>::name + ") (" + OP<uint32_t>::name + ") Expected "+ std::to_string(dataFromBuffer[workgroupOffset + localInvocationIndex])+ " got " + std::to_string(tmp[localInvocationIndex]), ELL_ERROR);
250243
success = false;
251244
break;
252245
}
@@ -277,7 +270,9 @@ bool runTest(video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline,
277270
passed = validateResults<Arithmetic,::min>(driver, inputData, workgroupSize, workgroupCount, buffers[5].get())&&passed;
278271
passed = validateResults<Arithmetic,::max>(driver, inputData, workgroupSize, workgroupCount, buffers[6].get())&&passed;
279272
if(is_workgroup_test)
280-
passed = validateResults<Arithmetic,bitcount>(driver, inputData, workgroupSize, workgroupCount, buffers[7].get()) && passed;
273+
{
274+
passed = validateResults<Arithmetic, countBits>(driver, inputData, workgroupSize, workgroupCount, buffers[7].get()) && passed;
275+
}
281276

282277
return passed;
283278
}

examples_tests/48.ArithmeticUnitTest/testWorkgroupExclusive.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ void main()
1313
multOutput[gl_GlobalInvocationID.x] = nbl_glsl_workgroupExclusiveMul(sourceVal);
1414
minOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupExclusiveMin(sourceVal);
1515
maxOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupExclusiveMax(sourceVal);
16+
nbl_glsl_workgroupBallot((sourceVal&0x1u)==0x1u);
1617
bitCountOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupBallotExclusiveBitCount();
1718

1819
}

examples_tests/48.ArithmeticUnitTest/testWorkgroupInclusive.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ void main()
1313
multOutput[gl_GlobalInvocationID.x] = nbl_glsl_workgroupInclusiveMul(sourceVal);
1414
minOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupInclusiveMin(sourceVal);
1515
maxOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupInclusiveMax(sourceVal);
16+
nbl_glsl_workgroupBallot((sourceVal&0x1u)==0x1u);
1617
bitCountOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupBallotInclusiveBitCount();
1718
}

examples_tests/48.ArithmeticUnitTest/testWorkgroupReduce.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ void main()
1313
multOutput[gl_GlobalInvocationID.x] = nbl_glsl_workgroupMul(sourceVal);
1414
minOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupMin(sourceVal);
1515
maxOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupMax(sourceVal);
16+
nbl_glsl_workgroupBallot((sourceVal&0x1u)==0x1u);
1617
bitCountOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupBallotBitCount();
1718

1819
}
Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
#include "shaderCommon.glsl"
2-
#include "nbl/builtin/glsl/workgroup/ballot.glsl"
3-
#include "nbl/builtin/glsl/workgroup/arithmetic.glsl"
2+
3+
// ORDER OF INCLUDES MATTERS !!!!!
4+
// first the feature that requires the most shared memory should be included
5+
// anyway when one is using more than 2 features that rely on shared memory,
6+
// they should declare the shared memory of appropriate size by themselves.
7+
// But in this unit test we don't because we need to test if the default
8+
// sizing macros actually work for all workgroup sizes.
9+
#include <nbl/builtin/glsl/workgroup/arithmetic.glsl>
10+
#include <nbl/builtin/glsl/workgroup/ballot.glsl>

0 commit comments

Comments
 (0)