Skip to content

Commit cb636ee

Browse files
committed
added workgroup bitcount unit test
1 parent c2db650 commit cb636ee

File tree

6 files changed

+46
-25
lines changed

6 files changed

+46
-25
lines changed

examples_tests/48.ArithmeticUnitTest/main.cpp

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,17 @@ struct max
8181

8282
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "max";
8383
};
84+
template<typename T>
85+
struct bitcount
86+
{
87+
using type_t = T;
88+
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = T(0);
89+
90+
inline T operator()(T left, T right) { return T(0); }
91+
92+
_NBL_STATIC_INLINE_CONSTEXPR const char* name = "bitcount";
93+
};
94+
8495

8596

8697
//subgroup method emulations on the CPU, to verify the results of the GPU methods
@@ -250,7 +261,7 @@ bool validateResults(video::IVideoDriver* driver, const uint32_t* inputData, con
250261

251262
}
252263
template<template<class> class Arithmetic>
253-
bool runTest(video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline, const video::IGPUDescriptorSet* ds, const uint32_t* inputData, const uint32_t workgroupSize, core::smart_refctd_ptr<IGPUBuffer>* const buffers)
264+
bool runTest(video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline, const video::IGPUDescriptorSet* ds, const uint32_t* inputData, const uint32_t workgroupSize, core::smart_refctd_ptr<IGPUBuffer>* const buffers, bool is_workgroup_test = false)
254265
{
255266
driver->bindComputePipeline(pipeline);
256267
driver->bindDescriptorSets(video::EPBP_COMPUTE,pipeline->getLayout(),0u,1u,&ds,nullptr);
@@ -265,6 +276,9 @@ bool runTest(video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline,
265276
passed = validateResults<Arithmetic,mul>(driver, inputData, workgroupSize, workgroupCount, buffers[4].get())&&passed;
266277
passed = validateResults<Arithmetic,::min>(driver, inputData, workgroupSize, workgroupCount, buffers[5].get())&&passed;
267278
passed = validateResults<Arithmetic,::max>(driver, inputData, workgroupSize, workgroupCount, buffers[6].get())&&passed;
279+
if(is_workgroup_test)
280+
passed = validateResults<Arithmetic,bitcount>(driver, inputData, workgroupSize, workgroupCount, buffers[7].get()) && passed;
281+
268282
return passed;
269283
}
270284

@@ -300,43 +314,41 @@ int main()
300314
}
301315
auto gpuinputDataBuffer = driver->createFilledDeviceLocalGPUBufferOnDedMem(kBufferSize, inputData);
302316

303-
//create 7 buffers.
304-
core::smart_refctd_ptr<IGPUBuffer> buffers[7];
305-
for (size_t i = 0; i < 7; i++)
317+
//create 8 buffers.
318+
constexpr const int outputBufferCount = 8;
319+
constexpr const int totalBufferCount = outputBufferCount+1;
320+
321+
core::smart_refctd_ptr<IGPUBuffer> buffers[outputBufferCount];
322+
for (size_t i = 0; i < outputBufferCount; i++)
306323
{
307324
buffers[i] = driver->createDeviceLocalGPUBufferOnDedMem(kBufferSize);
308325
}
309326

310-
IGPUDescriptorSetLayout::SBinding binding[8] = {
311-
{0u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr}, //input with randomized numbers
312-
{1u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
313-
{2u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
314-
{3u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
315-
{4u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
316-
{5u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
317-
{6u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
318-
{7u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr},
319-
};
320-
auto gpuDSLayout = driver->createGPUDescriptorSetLayout(binding, binding + 8);
321-
constexpr uint32_t pushconstantSize = 64u;
327+
IGPUDescriptorSetLayout::SBinding binding[totalBufferCount];
328+
for (uint32_t i = 0u; i < totalBufferCount; i++)
329+
{
330+
binding[i] = { i,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr };
331+
}
332+
auto gpuDSLayout = driver->createGPUDescriptorSetLayout(binding, binding + totalBufferCount);
333+
constexpr uint32_t pushconstantSize = 8u* totalBufferCount;
322334
SPushConstantRange pcRange[1] = { IGPUSpecializedShader::ESS_COMPUTE,0u,pushconstantSize };
323335
auto pipelineLayout = driver->createGPUPipelineLayout(pcRange, pcRange + pushconstantSize, core::smart_refctd_ptr(gpuDSLayout));
324336

325337
auto descriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr(gpuDSLayout));
326338
{
327-
IGPUDescriptorSet::SDescriptorInfo infos[8];
339+
IGPUDescriptorSet::SDescriptorInfo infos[totalBufferCount];
328340
infos[0].desc = gpuinputDataBuffer;
329341
infos[0].buffer = { 0u,kBufferSize };
330-
for (uint32_t i=1u; i<=7u; i++)
342+
for (uint32_t i=1u; i<= outputBufferCount; i++)
331343
{
332344
infos[i].desc = buffers[i - 1];
333345
infos[i].buffer = { 0u,kBufferSize };
334346

335347
}
336-
IGPUDescriptorSet::SWriteDescriptorSet writes[8];
337-
for (uint32_t i=0u; i<8u; i++)
348+
IGPUDescriptorSet::SWriteDescriptorSet writes[totalBufferCount];
349+
for (uint32_t i=0u; i< totalBufferCount; i++)
338350
writes[i] = { descriptorSet.get(),i,0u,1u,EDT_STORAGE_BUFFER,infos + i };
339-
driver->updateDescriptorSets(8, writes, 0u, nullptr);
351+
driver->updateDescriptorSets(totalBufferCount, writes, 0u, nullptr);
340352
}
341353
struct GLSLCodeWithWorkgroup {
342354
uint32_t workgroup_definition_position;
@@ -391,9 +403,9 @@ int main()
391403
passed = runTest<emulatedSubgroupReduction>(driver,pipelines[0u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
392404
passed = runTest<emulatedSubgroupScanExclusive>(driver,pipelines[1u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
393405
passed = runTest<emulatedSubgroupScanInclusive>(driver,pipelines[2u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
394-
passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[3u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
395-
passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[4u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
396-
passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[5u].get(),descriptorSet.get(),inputData,workgroupSize,buffers)&&passed;
406+
passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[3u].get(),descriptorSet.get(),inputData,workgroupSize,buffers,true)&&passed;
407+
passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[4u].get(),descriptorSet.get(),inputData,workgroupSize,buffers, true)&&passed;
408+
passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[5u].get(),descriptorSet.get(),inputData,workgroupSize,buffers, true)&&passed;
397409

398410
if (passed)
399411
os::Printer::log("Passed test #" + std::to_string(workgroupSize), ELL_INFORMATION);

examples_tests/48.ArithmeticUnitTest/shaderCommon.glsl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,8 @@ layout(set = 0, binding = 6, std430) writeonly buffer outmin
3333
layout(set = 0, binding = 7, std430) writeonly buffer outmax
3434
{
3535
uint maxOutput[];
36+
};
37+
layout(set = 0, binding = 8, std430) writeonly buffer outbitcount
38+
{
39+
uint bitCountOutput[];
3640
};

examples_tests/48.ArithmeticUnitTest/testWorkgroupExclusive.comp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,6 @@ void main()
1313
multOutput[gl_GlobalInvocationID.x] = nbl_glsl_workgroupExclusiveMul(sourceVal);
1414
minOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupExclusiveMin(sourceVal);
1515
maxOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupExclusiveMax(sourceVal);
16+
bitCountOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupBallotExclusiveBitCount();
17+
1618
}

examples_tests/48.ArithmeticUnitTest/testWorkgroupInclusive.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ void main()
1313
multOutput[gl_GlobalInvocationID.x] = nbl_glsl_workgroupInclusiveMul(sourceVal);
1414
minOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupInclusiveMin(sourceVal);
1515
maxOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupInclusiveMax(sourceVal);
16+
bitCountOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupBallotInclusiveBitCount();
1617
}

examples_tests/48.ArithmeticUnitTest/testWorkgroupReduce.comp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,6 @@ void main()
1313
multOutput[gl_GlobalInvocationID.x] = nbl_glsl_workgroupMul(sourceVal);
1414
minOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupMin(sourceVal);
1515
maxOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupMax(sourceVal);
16+
bitCountOutput [gl_GlobalInvocationID.x] = nbl_glsl_workgroupBallotBitCount();
17+
1618
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#include "shaderCommon.glsl"
2-
2+
#include "nbl/builtin/glsl/workgroup/ballot.glsl"
33
#include "nbl/builtin/glsl/workgroup/arithmetic.glsl"

0 commit comments

Comments
 (0)