Skip to content

Commit 8728b05

Browse files
Merge pull request #559 from Hazardu/BallotBitCountTest
Ballot bitCount unit test
2 parents e7a536b + 742d85e commit 8728b05

File tree

9 files changed

+291
-0
lines changed

9 files changed

+291
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
include(common RESULT_VARIABLE RES)
3+
if(NOT RES)
4+
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
5+
endif()
6+
7+
irr_create_executable_project("" "" "" "")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#define BUFFER_DWORD_COUNT (32*1024*1024)
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
#define _IRR_STATIC_LIB_
2+
#include <irrlicht.h>
3+
4+
#include "../../source/Irrlicht/COpenGLDriver.h"
5+
6+
7+
using namespace irr;
8+
using namespace core;
9+
using namespace video;
10+
using namespace asset;
11+
12+
//workgroup methods - since there are no subgroup methods for bit count
13+
14+
uint32_t bitCount(uint32_t input)
15+
{
16+
return input & 1;
17+
}
18+
19+
struct emulatedWorkgroupReduction
20+
{
21+
inline void operator()(uint32_t* outputData, uint32_t workgroupSize, uint32_t subgroupSize)
22+
{
23+
uint32_t bitC = 0;
24+
/*for (auto i=1u; i<workgroupSize; i++)
25+
bitC += bitCount<uint32_t>(1);*/
26+
std::fill(outputData,outputData+workgroupSize,bitC);
27+
}
28+
29+
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "workgroup reduction";
30+
};
31+
struct emulatedWorkgroupScanExclusive
32+
{
33+
inline void operator()(uint32_t* outputData, uint32_t workgroupSize, uint32_t subgroupSize)
34+
{
35+
uint32_t bitC = 0;
36+
//outputData[0u] = OP::IdentityElement;
37+
//for (auto i=1u; i<workgroupSize; i++)
38+
// outputData[i] = OP()(outputData[i-1u],workgroupData[i-1u]);
39+
//uint32_t bitC = 0;
40+
//for (auto i = 1u; i < workgroupSize; i++)
41+
// bitC += bitCount<uint32_t>(1);
42+
std::fill(outputData, outputData + workgroupSize, bitC);
43+
44+
45+
}
46+
47+
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "workgroup exclusive scan";
48+
};
49+
struct emulatedWorkgroupScanInclusive
50+
{
51+
52+
inline void operator()(uint32_t* outputData, uint32_t workgroupSize, uint32_t subgroupSize)
53+
{
54+
uint32_t bitC = 0;
55+
/* outputData[0u] = workgroupData[0u];
56+
for (auto i=1u; i<workgroupSize; i++)
57+
outputData[i] = OP()(outputData[i-1u],workgroupData[i]);*/
58+
std::fill(outputData, outputData + workgroupSize, bitC);
59+
60+
}
61+
62+
_IRR_STATIC_INLINE_CONSTEXPR const char* name = "workgroup inclusive scan";
63+
};
64+
65+
66+
#include "common.glsl"
67+
constexpr uint32_t kBufferSize = BUFFER_DWORD_COUNT*sizeof(uint32_t);
68+
69+
//returns true if result matches
70+
template<class Arithmetic>
71+
bool validateResults(video::IVideoDriver* driver, const uint32_t workgroupSize, const uint32_t workgroupCount, video::IGPUBuffer* bufferToDownload)
72+
{
73+
constexpr uint64_t timeoutInNanoSeconds = 15000000000u;
74+
const uint32_t alignment = sizeof(uint32_t);
75+
auto downloadStagingArea = driver->getDefaultDownStreamingBuffer();
76+
auto downBuffer = downloadStagingArea->getBuffer();
77+
78+
79+
bool success = false;
80+
81+
82+
uint32_t address = std::remove_pointer<decltype(downloadStagingArea)>::type::invalid_address;
83+
auto unallocatedSize = downloadStagingArea->multi_alloc(1u, &address, &kBufferSize, &alignment);
84+
if (unallocatedSize)
85+
{
86+
os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR);
87+
return false;
88+
}
89+
driver->copyBuffer(bufferToDownload, downBuffer, 0, address, kBufferSize);
90+
91+
auto downloadFence = driver->placeFence(true);
92+
auto result = downloadFence->waitCPU(timeoutInNanoSeconds, true);
93+
if (result != video::E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED && result != video::E_DRIVER_FENCE_RETVAL::EDFR_FAIL)
94+
{
95+
success = true;
96+
97+
if (downloadStagingArea->needsManualFlushOrInvalidate())
98+
driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,kBufferSize} });
99+
100+
auto dataFromBuffer = reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(downloadStagingArea->getBufferPointer())+address);
101+
102+
// now check if the data obtained has valid values
103+
constexpr uint32_t subgroupSize = 4u;
104+
uint32_t* tmp = new uint32_t[workgroupSize];
105+
for (uint32_t workgroupID=0u; success&&workgroupID<workgroupCount; workgroupID++)
106+
{
107+
const auto workgroupOffset = workgroupID*workgroupSize;
108+
Arithmetic()(tmp,workgroupSize,subgroupSize);
109+
for (uint32_t localInvocationIndex=0u; localInvocationIndex<workgroupSize; localInvocationIndex++)
110+
if (tmp[localInvocationIndex]!=dataFromBuffer[workgroupOffset+localInvocationIndex])
111+
{
112+
os::Printer::log("Failed test #" + std::to_string(workgroupSize) + " (" + Arithmetic::name + ")", ELL_ERROR);
113+
success = false;
114+
break;
115+
}
116+
}
117+
delete[] tmp;
118+
}
119+
else
120+
os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR);
121+
122+
downloadStagingArea->multi_free(1u, &address, &kBufferSize, nullptr);
123+
return success;
124+
125+
}
126+
template<class Arithmetic>
127+
bool runTest(video::IVideoDriver* driver, video::IGPUComputePipeline* pipeline, const video::IGPUDescriptorSet* ds, const uint32_t workgroupSize, core::smart_refctd_ptr<IGPUBuffer> buffer)
128+
{
129+
if (pipeline == nullptr) return false; //code could not be compiled
130+
driver->bindComputePipeline(pipeline);
131+
driver->bindDescriptorSets(video::EPBP_COMPUTE,pipeline->getLayout(),0u,1u,&ds,nullptr);
132+
const uint32_t workgroupCount = BUFFER_DWORD_COUNT/workgroupSize;
133+
driver->dispatch(workgroupCount, 1, 1);
134+
video::COpenGLExtensionHandler::extGlMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);
135+
//check results
136+
bool passed = validateResults<Arithmetic>(driver, workgroupSize, workgroupCount, buffer.get());
137+
return passed;
138+
}
139+
140+
int main()
141+
{
142+
irr::SIrrlichtCreationParameters params;
143+
params.Bits = 24;
144+
params.ZBufferBits = 24; //we'd like 32bit here
145+
params.DriverType = video::EDT_OPENGL; //! Only Well functioning driver, software renderer left for sake of 2D image drawing
146+
params.WindowSize = dimension2d<uint32_t>(1280, 720);
147+
params.Fullscreen = false;
148+
params.Vsync = true; //! If supported by target platform
149+
params.Doublebuffer = true;
150+
params.Stencilbuffer = false; //! This will not even be a choice soon
151+
params.StreamingDownloadBufferSize = kBufferSize;
152+
auto device = createDeviceEx(params);
153+
154+
if (!device)
155+
return 1; // could not create selected driver.
156+
157+
video::IVideoDriver* driver = device->getVideoDriver();
158+
io::IFileSystem* filesystem = device->getFileSystem();
159+
asset::IAssetManager* am = device->getAssetManager();
160+
161+
//buffer with results from the gpu
162+
core::smart_refctd_ptr<IGPUBuffer> buffer= driver->createDeviceLocalGPUBufferOnDedMem(kBufferSize);
163+
164+
165+
IGPUDescriptorSetLayout::SBinding binding = { 0u,EDT_STORAGE_BUFFER,1u,IGPUSpecializedShader::ESS_COMPUTE,nullptr };
166+
auto gpuDSLayout = driver->createGPUDescriptorSetLayout(&binding, &binding + 1);
167+
constexpr uint32_t pushconstantSize = 12;
168+
SPushConstantRange pcRange[1] = { IGPUSpecializedShader::ESS_COMPUTE,0u,pushconstantSize };
169+
auto pipelineLayout = driver->createGPUPipelineLayout(pcRange, pcRange + pushconstantSize, core::smart_refctd_ptr(gpuDSLayout));
170+
171+
auto descriptorSet = driver->createGPUDescriptorSet(core::smart_refctd_ptr(gpuDSLayout));
172+
{
173+
IGPUDescriptorSet::SDescriptorInfo info;
174+
info.desc = buffer;
175+
info.buffer = { 0u,kBufferSize };
176+
177+
IGPUDescriptorSet::SWriteDescriptorSet write = { descriptorSet.get(),0u,0u,1u,EDT_STORAGE_BUFFER, &info };
178+
179+
driver->updateDescriptorSets(1, &write, 0u, nullptr);
180+
}
181+
struct GLSLCodeWithWorkgroup {
182+
uint32_t workgroup_definition_position;
183+
std::string glsl;
184+
};
185+
constexpr const char* symbolsToReplace = "????";
186+
auto getShaderGLSL = [&](const char* filePath)
187+
{
188+
std::ifstream file(filePath);
189+
std::stringstream buff; buff << file.rdbuf();
190+
std::string shaderCode = buff.str();
191+
uint32_t wgPos = shaderCode.find(symbolsToReplace, 0);
192+
GLSLCodeWithWorkgroup ret = { wgPos,shaderCode };
193+
return ret;
194+
};
195+
GLSLCodeWithWorkgroup shaderGLSL[] =
196+
{
197+
getShaderGLSL("../testWorkgroupReduce.comp"),
198+
getShaderGLSL("../testWorkgroupExclusive.comp"),
199+
getShaderGLSL("../testWorkgroupInclusive.comp")
200+
};
201+
constexpr auto kTestTypeCount = sizeof(shaderGLSL)/sizeof(GLSLCodeWithWorkgroup);
202+
203+
auto getGPUShader = [&](GLSLCodeWithWorkgroup glsl, uint32_t wg_count)
204+
{
205+
auto alteredGLSL = glsl.glsl.replace(glsl.workgroup_definition_position, 4, std::to_string(wg_count));
206+
auto shaderUnspecialized = core::make_smart_refctd_ptr<asset::ICPUShader>(alteredGLSL.data());
207+
asset::ISpecializedShader::SInfo specinfo(nullptr, nullptr, "main", IGPUSpecializedShader::ESS_COMPUTE, "../file.comp");
208+
auto cs = core::make_smart_refctd_ptr<asset::ICPUSpecializedShader>(std::move(shaderUnspecialized), std::move(specinfo));
209+
auto cs_rawptr = cs.get();
210+
core::smart_refctd_ptr<IGPUSpecializedShader> shader = driver->getGPUObjectsFromAssets(&cs_rawptr, &cs_rawptr + 1)->front();
211+
return shader;
212+
};
213+
214+
//max workgroup size is hardcoded to 1024
215+
uint32_t totalFailCount = 0;
216+
const auto ds = descriptorSet.get();
217+
for (uint32_t workgroupSize=1u; workgroupSize<=1024u; workgroupSize++)
218+
{
219+
core::smart_refctd_ptr<IGPUComputePipeline> pipelines[kTestTypeCount];
220+
for (uint32_t i=0u; i<kTestTypeCount; i++)
221+
pipelines[i] = driver->createGPUComputePipeline(nullptr, core::smart_refctd_ptr(pipelineLayout), std::move(getGPUShader(shaderGLSL[i], workgroupSize)));
222+
223+
bool passed = true;
224+
225+
driver->beginScene(true);
226+
const video::IGPUDescriptorSet* ds = descriptorSet.get();
227+
passed = runTest<emulatedWorkgroupReduction>(driver,pipelines[0u].get(),descriptorSet.get(),workgroupSize,buffer)&&passed;
228+
passed = runTest<emulatedWorkgroupScanExclusive>(driver,pipelines[1u].get(),descriptorSet.get(),workgroupSize, buffer)&&passed;
229+
passed = runTest<emulatedWorkgroupScanInclusive>(driver,pipelines[2u].get(),descriptorSet.get(),workgroupSize, buffer)&&passed;
230+
231+
if (passed)
232+
os::Printer::log("Passed test #" + std::to_string(workgroupSize), ELL_INFORMATION);
233+
else
234+
{
235+
totalFailCount++;
236+
os::Printer::log("Failed test #" + std::to_string(workgroupSize), ELL_INFORMATION);
237+
}
238+
driver->endScene();
239+
}
240+
os::Printer::log("==========Result==========", ELL_INFORMATION);
241+
os::Printer::log("Fail Count: " + std::to_string(totalFailCount), ELL_INFORMATION);
242+
243+
return 0;
244+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#include "common.glsl"
2+
3+
layout(local_size_x = _IRR_GLSL_WORKGROUP_SIZE_) in;
4+
5+
layout(set = 0, binding = 0, std430) writeonly buffer outBitCount
6+
{
7+
uint bitcountOutput[];
8+
};
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#version 450
2+
#define _IRR_GLSL_WORKGROUP_SIZE_ ????
3+
4+
#include "workgroupCommon.glsl"
5+
6+
void main()
7+
{
8+
bitcountOutput[gl_GlobalInvocationID.x] = irr_glsl_workgroupBallotExclusiveBitCount();
9+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#version 450
2+
#define _IRR_GLSL_WORKGROUP_SIZE_ ????
3+
4+
#include "workgroupCommon.glsl"
5+
6+
void main()
7+
{
8+
bitcountOutput[gl_GlobalInvocationID.x] = irr_glsl_workgroupBallotInclusiveBitCount();
9+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#version 450
2+
#define _IRR_GLSL_WORKGROUP_SIZE_ ????
3+
4+
#include "workgroupCommon.glsl"
5+
6+
void main()
7+
{
8+
bitcountOutput[gl_GlobalInvocationID.x] = irr_glsl_workgroupBallotBitCount();
9+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include "shaderCommon.glsl"
2+
3+
#include "irr/builtin/glsl/workgroup/ballot.glsl"

examples_tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,4 @@ add_subdirectory(45.BRDFEvalTest EXCLUDE_FROM_ALL)
7272
add_subdirectory(46.SamplingValidation EXCLUDE_FROM_ALL)
7373
add_subdirectory(47.DerivMapTest EXCLUDE_FROM_ALL)
7474
add_subdirectory(48.ArithmeticUnitTest EXCLUDE_FROM_ALL)
75+
add_subdirectory(49.ArithmeticBallotUnitTest EXCLUDE_FROM_ALL)

0 commit comments

Comments
 (0)