forked from alpaka-group/alpaka3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathallocDeferred.cpp
More file actions
121 lines (100 loc) · 4.32 KB
/
allocDeferred.cpp
File metadata and controls
121 lines (100 loc) · 4.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/* Copyright 2025 René Widera
* SPDX-License-Identifier: MPL-2.0
*/
#include <alpaka/alpaka.hpp>
#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>
#include <iostream>
using namespace alpaka;
using TestApis = std::decay_t<decltype(onHost::allBackends(onHost::enabledApis, exec::enabledExecutors))>;
struct RaceCheckKernel
{
ALPAKA_FN_ACC void operator()(auto const& acc, concepts::IMdSpan<int> auto success, concepts::IMdSpan auto in)
const
{
for(auto i : onAcc::makeIdxMap(acc, onAcc::worker::threadsInGrid, IdxRange(in.getExtents())))
{
if(in[i] != 3.14159265f)
// set to false
onAcc::atomicExch(acc, &success[0], 0);
}
}
};
void allocDeferredImplicitWait(auto device, auto exec)
{
onHost::Queue queue0 = device.makeQueue();
auto hBufferResults = onHost::allocHost<int>(1u);
auto dBufferResults = onHost::allocLike(device, hBufferResults);
onHost::fill(queue0, dBufferResults, 1);
onHost::wait(queue0);
{
// Asynchronous allocation memory is in the destructor waiting for all work enqueued in the creator queue.
auto sharedBuffer = onHost::allocDeferred<float>(queue0, 10ul);
onHost::fill(queue0, sharedBuffer, 3.14159265f);
queue0.enqueue(
exec,
getFrameSpec<float>(queue0.getDevice(), sharedBuffer.getExtents()),
KernelBundle{RaceCheckKernel{}, dBufferResults, sharedBuffer});
/* sharedBuffer is detroyed here before the kernel is finshed.
* If the view is not waiting for all work in the queue enqueued before the destructor of the view is called,
* the application should crash with invalid memory access or the validation in the kernel should fail.
* Typically, the kernel is reading zero's if the synchronization is missing.
*/
}
{
auto sharedBuffer = onHost::allocDeferred<float>(queue0, 10ul);
onHost::fill(queue0, sharedBuffer, 42.0f);
}
onHost::memcpy(queue0, hBufferResults, dBufferResults);
onHost::wait(queue0);
REQUIRE(hBufferResults[0] == 1);
}
void allocDeferredExplicitWait(auto device, auto exec)
{
onHost::Queue queue0 = device.makeQueue();
onHost::Queue queue1 = device.makeQueue();
auto hBufferResults = onHost::allocHost<int>(1u);
auto dBufferResults = onHost::allocLike(device, hBufferResults);
onHost::fill(queue0, dBufferResults, 1);
onHost::wait(queue0);
{
auto sharedBuffer = onHost::allocDeferred<float>(queue1, 10ul);
// set an action that the destructor is waiting for all work enqueued in queue0
sharedBuffer.destructorWaitFor(queue0);
// wait for the allocation
onHost::wait(queue1);
onHost::fill(queue0, sharedBuffer, 3.14159265f);
queue0.enqueue(
exec,
getFrameSpec<float>(queue0.getDevice(), sharedBuffer.getExtents()),
KernelBundle{RaceCheckKernel{}, dBufferResults, sharedBuffer});
/* sharedBuffer is detroyed here before the kernel is finshed.
* If the view is not waiting for all work in the queue enqueued before the destructor of the view is called,
* the application should crash with invalid memory access or the validation in the kernel should fail.
* Typically, the kernel is reading zero's if the synchronization is missing.
*/
}
onHost::memcpy(queue0, hBufferResults, dBufferResults);
onHost::wait(queue0);
REQUIRE(hBufferResults[0] == 1);
}
TEMPLATE_LIST_TEST_CASE("allocDeferred", "", TestApis)
{
auto cfg = TestType::makeDict();
auto deviceSpec = cfg[object::deviceSpec];
auto exec = cfg[object::exec];
auto devSelector = onHost::makeDeviceSelector(deviceSpec);
if(!devSelector.isAvailable())
{
std::cout << "No device available for " << deviceSpec.getName() << std::endl;
return;
}
onHost::Device device = devSelector.makeDevice(0);
std::cout << deviceSpec.getApi().getName() << "on " << device.getName() << std::endl;
// repeat the test multiple times to increase the change to trigger data races
constexpr int testRounds = 10;
for(int i = 0; i < testRounds; ++i)
allocDeferredImplicitWait(device, exec);
for(int i = 0; i < testRounds; ++i)
allocDeferredExplicitWait(device, exec);
}