Skip to content

Commit 332aa37

Browse files
committed
Manifest the bug
1 parent 1243554 commit 332aa37

File tree

4 files changed

+210
-0
lines changed

4 files changed

+210
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
include(common RESULT_VARIABLE RES)
2+
if(NOT RES)
3+
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
4+
endif()
5+
6+
nbl_create_executable_project("" "" "" "")
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#version 450
2+
3+
#define _NBL_GLSL_WORKGROUP_SIZE_ (1 << 8)
4+
layout (local_size_x = _NBL_GLSL_WORKGROUP_SIZE_) in;
5+
6+
layout (set = 0, binding = 0, std430) buffer inout_buffer
7+
{
8+
int inout_values[];
9+
};
10+
11+
#include <nbl/builtin/glsl/workgroup/arithmetic.glsl>
12+
#include <nbl/builtin/glsl/limits/numeric.glsl>
13+
14+
15+
void main()
16+
{
17+
int data = INT_MAX;
18+
if (gl_GlobalInvocationID.x < 147)
19+
data = inout_values[gl_GlobalInvocationID.x];
20+
21+
int scan_result = nbl_glsl_workgroupExclusiveMin(data);
22+
23+
if (gl_GlobalInvocationID.x < 147)
24+
inout_values[gl_GlobalInvocationID.x] = scan_result;
25+
}

examples_tests/51.RadixSort/main.cpp

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#define _NBL_STATIC_LIB_
2+
#include <nabla.h>
3+
4+
#include "../../source/Nabla/COpenGLDriver.h"
5+
6+
using namespace nbl;
7+
using namespace core;
8+
using namespace video;
9+
using namespace asset;
10+
11+
template <typename T>
12+
static T* DebugGPUBufferDownload(smart_refctd_ptr<IGPUBuffer> buffer_to_download, size_t buffer_size, IVideoDriver* driver)
13+
{
14+
constexpr uint64_t timeout_ns = 15000000000u;
15+
const uint32_t alignment = uint32_t(sizeof(T));
16+
auto downloadStagingArea = driver->getDefaultDownStreamingBuffer();
17+
auto downBuffer = downloadStagingArea->getBuffer();
18+
19+
bool success = false;
20+
21+
uint32_t array_size_32 = uint32_t(buffer_size);
22+
uint32_t address = std::remove_pointer<decltype(downloadStagingArea)>::type::invalid_address;
23+
auto unallocatedSize = downloadStagingArea->multi_alloc(1u, &address, &array_size_32, &alignment);
24+
if (unallocatedSize)
25+
{
26+
os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR);
27+
exit(420);
28+
}
29+
30+
driver->copyBuffer(buffer_to_download.get(), downBuffer, 0, address, array_size_32);
31+
32+
auto downloadFence = driver->placeFence(true);
33+
auto result = downloadFence->waitCPU(timeout_ns, true);
34+
35+
T* dataFromBuffer = nullptr;
36+
if (result != video::E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED && result != video::E_DRIVER_FENCE_RETVAL::EDFR_FAIL)
37+
{
38+
if (downloadStagingArea->needsManualFlushOrInvalidate())
39+
driver->invalidateMappedMemoryRanges({ {downloadStagingArea->getBuffer()->getBoundMemory(),address,array_size_32} });
40+
41+
dataFromBuffer = reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(downloadStagingArea->getBufferPointer()) + address);
42+
}
43+
else
44+
{
45+
os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR);
46+
}
47+
48+
downloadStagingArea->multi_free(1u, &address, &array_size_32, nullptr);
49+
50+
return dataFromBuffer;
51+
}
52+
53+
template <typename T>
54+
static void DebugCompareGPUvsCPU(smart_refctd_ptr<IGPUBuffer> gpu_buffer, T* cpu_buffer, size_t buffer_size, IVideoDriver* driver)
55+
{
56+
T* downloaded_buffer = DebugGPUBufferDownload<T>(gpu_buffer, buffer_size, driver);
57+
58+
size_t buffer_count = buffer_size / sizeof(T);
59+
60+
if (downloaded_buffer)
61+
{
62+
for (int i = 0; i < buffer_count; ++i)
63+
{
64+
if (downloaded_buffer[i] != cpu_buffer[i])
65+
__debugbreak();
66+
}
67+
68+
std::cout << "PASS" << std::endl;
69+
}
70+
}
71+
72+
int main()
73+
{
74+
nbl::SIrrlichtCreationParameters params;
75+
params.Bits = 24;
76+
params.ZBufferBits = 24;
77+
params.DriverType = video::EDT_OPENGL;
78+
params.WindowSize = dimension2d<uint32_t>(512, 512);
79+
params.Fullscreen = false;
80+
params.Vsync = true;
81+
params.Doublebuffer = true;
82+
params.Stencilbuffer = false;
83+
auto device = createDeviceEx(params);
84+
85+
if (!device)
86+
return 1;
87+
88+
IVideoDriver* driver = device->getVideoDriver();
89+
90+
io::IFileSystem* filesystem = device->getFileSystem();
91+
asset::IAssetManager* am = device->getAssetManager();
92+
93+
const size_t in_count = 147;
94+
const size_t in_size = in_count * sizeof(int32_t);
95+
int32_t in[in_count] = { -32518, -32702, -32740, -32435, -31619, -32752, -32233, -32730, -32698,
96+
-32468, -32582, -32556, -32525, -32615, -32475, -32718, -32024, -32457, -32467, -32663, -32735,
97+
-32524, -32666, -32397, -32684, -32088, -32683, -32741, -32761, -32715, -32730, -32405, -32588,
98+
-32610, -31794, -32622, -32711, -32324, -32425, -32692, -32604, -32660, -32761, -32685, -32572,
99+
-32450, -32636, -32674, -32661, -32684, -32399, -32194, -32536, -32575, -32605, -32630, -32689,
100+
-32743, -32130, -32554, -32737, -32534, -32696, -31740, -32733, -32326, -32625, -32603, -32554,
101+
-32756, -32582, -32592, -32750, -32464, -32649, -32396, -32645, -32032, -32278, -32179, -32710,
102+
-32372, -32418, -32597, -32748, -32761, -32722, -32368, -32658, -32621, -32672, -32661, -32726,
103+
-32632, -32474, -32713, -31854, -32682, -32704, -32126, -32486, -32279, -32131, -32613, -30809,
104+
-32686, -32728, -32723, -32705, -32369, -32704, -31879, -32529, -32350, -32544, -32726, -32724,
105+
-32424, -32725, -32149, -32515, -32705, -32519, -32660, -32687, -32519, -32446, -32342, -32716,
106+
-32629, -32733, -32464, -32749, -32745, -32532, -31924, -32737, -32570, -32402, -32571, -32350,
107+
-31861, -32631, -32645, -32726, -32734, -32672 };
108+
109+
110+
111+
auto in_gpu = driver->createFilledDeviceLocalGPUBufferOnDedMem(in_size, in);
112+
113+
smart_refctd_ptr<IGPUComputePipeline> pipeline = nullptr;
114+
smart_refctd_ptr<IGPUDescriptorSet> ds = nullptr;
115+
{
116+
const uint32_t count = 1u;
117+
IGPUDescriptorSetLayout::SBinding binding[count];
118+
for (uint32_t i = 0; i < count; ++i)
119+
binding[i] = { i, asset::EDT_STORAGE_BUFFER, 1u, IGPUSpecializedShader::ESS_COMPUTE, nullptr };
120+
121+
auto ds_layout_gpu = driver->createGPUDescriptorSetLayout(binding, binding + count);
122+
ds = driver->createGPUDescriptorSet(smart_refctd_ptr(ds_layout_gpu));
123+
124+
auto pipeline_layout = driver->createGPUPipelineLayout(nullptr, nullptr, smart_refctd_ptr(ds_layout_gpu));
125+
126+
smart_refctd_ptr<IGPUSpecializedShader> shader_gpu = nullptr;
127+
{
128+
auto file = smart_refctd_ptr<io::IReadFile>(filesystem->createAndOpenFile("../Debug.comp"));
129+
130+
asset::IAssetLoader::SAssetLoadParams lp;
131+
auto cs_bundle = am->getAsset("../Debug.comp", lp);
132+
auto cs = smart_refctd_ptr_static_cast<asset::ICPUSpecializedShader>(*cs_bundle.getContents().begin());
133+
auto cs_rawptr = cs.get();
134+
135+
shader_gpu = driver->getGPUObjectsFromAssets(&cs_rawptr, &cs_rawptr + 1)->front();
136+
}
137+
138+
pipeline = driver->createGPUComputePipeline(nullptr, std::move(pipeline_layout), std::move(shader_gpu));
139+
}
140+
141+
driver->beginScene(true);
142+
143+
{
144+
const uint32_t count = 1;
145+
IGPUDescriptorSet::SDescriptorInfo ds_info[count];
146+
ds_info[0].desc = in_gpu;
147+
ds_info[0].buffer = { 0u, in_size };
148+
149+
IGPUDescriptorSet::SWriteDescriptorSet writes[count];
150+
for (uint32_t i = 0; i < count; ++i)
151+
writes[i] = { ds.get(), i, 0u, 1u, asset::EDT_STORAGE_BUFFER, ds_info + i };
152+
153+
driver->updateDescriptorSets(count, writes, 0u, nullptr);
154+
}
155+
156+
driver->bindComputePipeline(pipeline.get());
157+
driver->bindDescriptorSets(video::EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &ds.get(), nullptr);
158+
driver->dispatch(1, 1, 1);
159+
160+
video::COpenGLExtensionHandler::extGlMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
161+
162+
int32_t* debug_cpu = new int32_t[in_count];
163+
164+
int32_t prefix_scan = INT_MAX;
165+
for (uint32_t i = 0; i < in_count; ++i)
166+
{
167+
debug_cpu[i] = prefix_scan;
168+
prefix_scan = min(prefix_scan, in[i]);
169+
}
170+
171+
DebugCompareGPUvsCPU<int32_t>(in_gpu, debug_cpu, in_size, driver);
172+
173+
174+
driver->endScene();
175+
176+
177+
return 0;
178+
}

examples_tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,4 @@ add_subdirectory(47.DerivMapTest EXCLUDE_FROM_ALL)
6363
add_subdirectory(48.ArithmeticUnitTest EXCLUDE_FROM_ALL)
6464
add_subdirectory(49.ComputeFFT EXCLUDE_FROM_ALL)
6565
add_subdirectory(50.MeshPacking EXCLUDE_FROM_ALL)
66+
add_subdirectory(51.RadixSort EXCLUDE_FROM_ALL)

0 commit comments

Comments
 (0)