Skip to content

Commit 3eed8ad

Browse files
committed
Simplify and extend the alpaka kernel tests
1 parent f0bc2a8 commit 3eed8ad

File tree

1 file changed

+147
-201
lines changed

1 file changed

+147
-201
lines changed

HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc

Lines changed: 147 additions & 201 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
// each test binary is built for a single Alpaka backend
1414
using namespace ALPAKA_ACCELERATOR_NAMESPACE;
1515

16-
static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel) "]";
17-
1816
struct VectorAddKernel {
1917
template <typename TAcc, typename T>
2018
ALPAKA_FN_ACC void operator()(
@@ -58,233 +56,181 @@ struct VectorAddKernel3D {
5856
}
5957
};
6058

61-
TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel), s_tag) {
62-
SECTION("VectorAddKernel") {
63-
// get the list of devices on the current platform
64-
auto const& devices = cms::alpakatools::devices<Platform>();
65-
if (devices.empty()) {
66-
std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
67-
<< ", the test will be skipped.\n";
68-
return;
69-
}
70-
71-
// random number generator with a gaussian distribution
72-
std::random_device rd{};
73-
std::default_random_engine rand{rd()};
74-
std::normal_distribution<float> dist{0., 1.};
75-
76-
// tolerance
77-
constexpr float epsilon = 0.000001;
78-
79-
// buffer size
80-
constexpr size_t size = 1024 * 1024;
81-
82-
// allocate input and output host buffers in pinned memory accessible by the Platform devices
83-
auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
84-
auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
85-
auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
86-
87-
// fill the input buffers with random data, and the output buffer with zeros
88-
for (size_t i = 0; i < size; ++i) {
89-
in1_h[i] = dist(rand);
90-
in2_h[i] = dist(rand);
91-
out_h[i] = 0.;
92-
}
93-
94-
// run the test on each device
95-
for (auto const& device : devices) {
96-
std::cout << "Test 1D vector addition on " << alpaka::getName(device) << '\n';
97-
auto queue = Queue(device);
98-
99-
// allocate input and output buffers on the device
100-
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
101-
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
102-
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
103-
104-
// copy the input data to the device; the size is known from the buffer objects
105-
alpaka::memcpy(queue, in1_d, in1_h);
106-
alpaka::memcpy(queue, in2_d, in2_h);
107-
108-
// fill the output buffer with zeros; the size is known from the buffer objects
109-
alpaka::memset(queue, out_d, 0.);
110-
111-
// launch the 1-dimensional kernel with scalar size
112-
auto div = cms::alpakatools::make_workdiv<Acc1D>(4, 4);
113-
alpaka::exec<Acc1D>(queue, div, VectorAddKernel{}, in1_d.data(), in2_d.data(), out_d.data(), size);
59+
// test the 1-dimensional kernel on all devices
60+
template <typename TKernel>
61+
void testVectorAddKernel(std::size_t problem_size, std::size_t grid_size, std::size_t block_size, TKernel kernel) {
62+
// random number generator with a gaussian distribution
63+
std::random_device rd{};
64+
std::default_random_engine rand{rd()};
65+
std::normal_distribution<float> dist{0., 1.};
66+
67+
// tolerance
68+
constexpr float epsilon = 0.000001;
69+
70+
// buffer size
71+
const size_t size = problem_size;
72+
73+
// allocate input and output host buffers in pinned memory accessible by the Platform devices
74+
auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
75+
auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
76+
auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
77+
78+
// fill the input buffers with random data, and the output buffer with zeros
79+
for (size_t i = 0; i < size; ++i) {
80+
in1_h[i] = dist(rand);
81+
in2_h[i] = dist(rand);
82+
out_h[i] = 0.;
83+
}
11484

115-
// copy the results from the device to the host
116-
alpaka::memcpy(queue, out_h, out_d);
85+
// run the test on each device
86+
for (auto const& device : cms::alpakatools::devices<Platform>()) {
87+
std::cout << "Test 1D vector addition on " << alpaka::getName(device) << " over " << problem_size << " values with "
88+
<< grid_size << " blocks of " << block_size << " elements\n";
89+
auto queue = Queue(device);
11790

118-
// wait for all the operations to complete
119-
alpaka::wait(queue);
91+
// allocate input and output buffers on the device
92+
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
93+
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
94+
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
12095

121-
// check the results
122-
for (size_t i = 0; i < size; ++i) {
123-
float sum = in1_h[i] + in2_h[i];
124-
REQUIRE(out_h[i] < sum + epsilon);
125-
REQUIRE(out_h[i] > sum - epsilon);
126-
}
96+
// copy the input data to the device; the size is known from the buffer objects
97+
alpaka::memcpy(queue, in1_d, in1_h);
98+
alpaka::memcpy(queue, in2_d, in2_h);
12799

128-
// reset the output buffer on the device to all zeros
129-
alpaka::memset(queue, out_d, 0.);
100+
// fill the output buffer with zeros; the size is known from the buffer objects
101+
alpaka::memset(queue, out_d, 0.);
130102

131-
// launch the 1-dimensional kernel with vector size
132-
alpaka::exec<Acc1D>(queue, div, VectorAddKernel1D{}, in1_d.data(), in2_d.data(), out_d.data(), size);
103+
// launch the 1-dimensional kernel with scalar size
104+
auto div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size);
105+
alpaka::exec<Acc1D>(queue, div, kernel, in1_d.data(), in2_d.data(), out_d.data(), size);
133106

134-
// copy the results from the device to the host
135-
alpaka::memcpy(queue, out_h, out_d);
107+
// copy the results from the device to the host
108+
alpaka::memcpy(queue, out_h, out_d);
136109

137-
// wait for all the operations to complete
138-
alpaka::wait(queue);
110+
// wait for all the operations to complete
111+
alpaka::wait(queue);
139112

140-
// check the results
141-
for (size_t i = 0; i < size; ++i) {
142-
float sum = in1_h[i] + in2_h[i];
143-
REQUIRE(out_h[i] < sum + epsilon);
144-
REQUIRE(out_h[i] > sum - epsilon);
145-
}
113+
// check the results
114+
for (size_t i = 0; i < size; ++i) {
115+
float sum = in1_h[i] + in2_h[i];
116+
REQUIRE(out_h[i] < sum + epsilon);
117+
REQUIRE(out_h[i] > sum - epsilon);
146118
}
147119
}
148120
}
149121

150-
TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel2D), s_tag) {
151-
SECTION("VectorAddKernel2D") {
152-
// get the list of devices on the current platform
153-
auto const& devices = cms::alpakatools::devices<Platform>();
154-
if (devices.empty()) {
155-
std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
156-
<< ", the test will be skipped.\n";
157-
return;
158-
}
159-
160-
// random number generator with a gaussian distribution
161-
std::random_device rd{};
162-
std::default_random_engine rand{rd()};
163-
std::normal_distribution<float> dist{0., 1.};
164-
165-
// tolerance
166-
constexpr float epsilon = 0.000001;
167-
168-
// 3-dimensional and linearised buffer size
169-
constexpr Vec2D ndsize = {16, 16};
170-
constexpr size_t size = ndsize.prod();
171-
172-
// allocate input and output host buffers in pinned memory accessible by the Platform devices
173-
auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
174-
auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
175-
auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
176-
177-
// fill the input buffers with random data, and the output buffer with zeros
178-
for (size_t i = 0; i < size; ++i) {
179-
in1_h[i] = dist(rand);
180-
in2_h[i] = dist(rand);
181-
out_h[i] = 0.;
182-
}
122+
// test the N-dimensional kernels on all devices
123+
template <typename TDim, typename TKernel>
124+
void testVectorAddKernelND(Vec<TDim> problem_size, Vec<TDim> grid_size, Vec<TDim> block_size, TKernel kernel) {
125+
// random number generator with a gaussian distribution
126+
std::random_device rd{};
127+
std::default_random_engine rand{rd()};
128+
std::normal_distribution<float> dist{0., 1.};
129+
130+
// tolerance
131+
constexpr float epsilon = 0.000001;
132+
133+
// linearised buffer size
134+
const size_t size = problem_size.prod();
135+
136+
// allocate input and output host buffers in pinned memory accessible by the Platform devices
137+
auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
138+
auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
139+
auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
140+
141+
// fill the input buffers with random data, and the output buffer with zeros
142+
for (size_t i = 0; i < size; ++i) {
143+
in1_h[i] = dist(rand);
144+
in2_h[i] = dist(rand);
145+
out_h[i] = 0.;
146+
}
183147

184-
// run the test on each device
185-
for (auto const& device : devices) {
186-
std::cout << "Test 2D vector addition on " << alpaka::getName(device) << '\n';
187-
auto queue = Queue(device);
148+
// run the test on each device
149+
for (auto const& device : cms::alpakatools::devices<Platform>()) {
150+
std::cout << "Test " << TDim::value << "D vector addition on " << alpaka::getName(device) << " over "
151+
<< problem_size << " values with " << grid_size << " blocks of " << block_size << " elements\n";
152+
auto queue = Queue(device);
188153

189-
// allocate input and output buffers on the device
190-
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
191-
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
192-
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
154+
// allocate input and output buffers on the device
155+
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
156+
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
157+
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
193158

194-
// copy the input data to the device; the size is known from the buffer objects
195-
alpaka::memcpy(queue, in1_d, in1_h);
196-
alpaka::memcpy(queue, in2_d, in2_h);
159+
// copy the input data to the device; the size is known from the buffer objects
160+
alpaka::memcpy(queue, in1_d, in1_h);
161+
alpaka::memcpy(queue, in2_d, in2_h);
197162

198-
// fill the output buffer with zeros; the size is known from the buffer objects
199-
alpaka::memset(queue, out_d, 0.);
163+
// fill the output buffer with zeros; the size is known from the buffer objects
164+
alpaka::memset(queue, out_d, 0.);
200165

201-
// launch the 3-dimensional kernel
202-
auto div = cms::alpakatools::make_workdiv<Acc2D>({4, 4}, {32, 32});
203-
alpaka::exec<Acc2D>(queue, div, VectorAddKernel2D{}, in1_d.data(), in2_d.data(), out_d.data(), ndsize);
166+
// launch the 3-dimensional kernel
167+
using AccND = Acc<TDim>;
168+
auto div = cms::alpakatools::make_workdiv<AccND>(grid_size, block_size);
169+
alpaka::exec<AccND>(queue, div, kernel, in1_d.data(), in2_d.data(), out_d.data(), problem_size);
204170

205-
// copy the results from the device to the host
206-
alpaka::memcpy(queue, out_h, out_d);
171+
// copy the results from the device to the host
172+
alpaka::memcpy(queue, out_h, out_d);
207173

208-
// wait for all the operations to complete
209-
alpaka::wait(queue);
174+
// wait for all the operations to complete
175+
alpaka::wait(queue);
210176

211-
// check the results
212-
for (size_t i = 0; i < size; ++i) {
213-
float sum = in1_h[i] + in2_h[i];
214-
REQUIRE(out_h[i] < sum + epsilon);
215-
REQUIRE(out_h[i] > sum - epsilon);
216-
}
177+
// check the results
178+
for (size_t i = 0; i < size; ++i) {
179+
float sum = in1_h[i] + in2_h[i];
180+
REQUIRE(out_h[i] < sum + epsilon);
181+
REQUIRE(out_h[i] > sum - epsilon);
217182
}
218183
}
219184
}
220185

221-
TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel3D), s_tag) {
222-
SECTION("VectorAddKernel3D") {
186+
TEST_CASE("Test alpaka kernels for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend",
187+
"[" EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "]") {
188+
SECTION("Alpaka N-dimensional kernels") {
223189
// get the list of devices on the current platform
224190
auto const& devices = cms::alpakatools::devices<Platform>();
225191
if (devices.empty()) {
226-
std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
227-
<< ", the test will be skipped.\n";
228-
return;
192+
INFO("No devices available on the platform " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE));
193+
REQUIRE(not devices.empty());
229194
}
230195

231-
// random number generator with a gaussian distribution
232-
std::random_device rd{};
233-
std::default_random_engine rand{rd()};
234-
std::normal_distribution<float> dist{0., 1.};
235-
236-
// tolerance
237-
constexpr float epsilon = 0.000001;
238-
239-
// 3-dimensional and linearised buffer size
240-
constexpr Vec3D ndsize = {50, 125, 16};
241-
constexpr size_t size = ndsize.prod();
242-
243-
// allocate input and output host buffers in pinned memory accessible by the Platform devices
244-
auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
245-
auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
246-
auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size);
247-
248-
// fill the input buffers with random data, and the output buffer with zeros
249-
for (size_t i = 0; i < size; ++i) {
250-
in1_h[i] = dist(rand);
251-
in2_h[i] = dist(rand);
252-
out_h[i] = 0.;
253-
}
254-
255-
// run the test on each device
256-
for (auto const& device : devices) {
257-
std::cout << "Test 3D vector addition on " << alpaka::getName(device) << '\n';
258-
auto queue = Queue(device);
259-
260-
// allocate input and output buffers on the device
261-
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
262-
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
263-
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
264-
265-
// copy the input data to the device; the size is known from the buffer objects
266-
alpaka::memcpy(queue, in1_d, in1_h);
267-
alpaka::memcpy(queue, in2_d, in2_h);
268-
269-
// fill the output buffer with zeros; the size is known from the buffer objects
270-
alpaka::memset(queue, out_d, 0.);
271-
272-
// launch the 3-dimensional kernel
273-
auto div = cms::alpakatools::make_workdiv<Acc3D>({5, 5, 1}, {4, 4, 4});
274-
alpaka::exec<Acc3D>(queue, div, VectorAddKernel3D{}, in1_d.data(), in2_d.data(), out_d.data(), ndsize);
275-
276-
// copy the results from the device to the host
277-
alpaka::memcpy(queue, out_h, out_d);
278-
279-
// wait for all the operations to complete
280-
alpaka::wait(queue);
281-
282-
// check the results
283-
for (size_t i = 0; i < size; ++i) {
284-
float sum = in1_h[i] + in2_h[i];
285-
REQUIRE(out_h[i] < sum + epsilon);
286-
REQUIRE(out_h[i] > sum - epsilon);
287-
}
288-
}
196+
// launch the 1-dimensional kernel with a small block size and a small number of blocks;
197+
// this relies on the kernel to loop over the "problem space" and do more work per block
198+
std::cout << "Test 1D vector addition with small block size, using scalar dimensions\n";
199+
testVectorAddKernel(10000, 32, 32, VectorAddKernel{});
200+
201+
// launch the 1-dimensional kernel with a large block size and a single block;
202+
// this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data
203+
std::cout << "Test 1D vector addition with large block size, using scalar dimensions\n";
204+
testVectorAddKernel(100, 1, 1024, VectorAddKernel{});
205+
206+
// launch the 1-dimensional kernel with a small block size and a small number of blocks;
207+
// this relies on the kernel to loop over the "problem space" and do more work per block
208+
std::cout << "Test 1D vector addition with small block size\n";
209+
testVectorAddKernelND<Dim1D>({10000}, {32}, {32}, VectorAddKernel1D{});
210+
211+
// launch the 1-dimensional kernel with a large block size and a single block;
212+
// this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data
213+
std::cout << "Test 1D vector addition with large block size\n";
214+
testVectorAddKernelND<Dim1D>({100}, {1}, {1024}, VectorAddKernel1D{});
215+
216+
// launch the 2-dimensional kernel with a small block size and a small number of blocks;
217+
// this relies on the kernel to loop over the "problem space" and do more work per block
218+
std::cout << "Test 2D vector addition with small block size\n";
219+
testVectorAddKernelND<Dim2D>({400, 250}, {4, 4}, {16, 16}, VectorAddKernel2D{});
220+
221+
// launch the 2-dimensional kernel with a large block size and a single block;
222+
// this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data
223+
std::cout << "Test 2D vector addition with large block size\n";
224+
testVectorAddKernelND<Dim2D>({20, 20}, {1, 1}, {32, 32}, VectorAddKernel2D{});
225+
226+
// launch the 3-dimensional kernel with a small block size and a small number of blocks;
227+
// this relies on the kernel to loop over the "problem space" and do more work per block
228+
std::cout << "Test 3D vector addition with small block size\n";
229+
testVectorAddKernelND<Dim3D>({50, 125, 16}, {5, 5, 1}, {4, 4, 4}, VectorAddKernel3D{});
230+
231+
// launch the 3-dimensional kernel with a large block size and a single block;
232+
// this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data
233+
std::cout << "Test 3D vector addition with large block size\n";
234+
testVectorAddKernelND<Dim3D>({5, 5, 5}, {1, 1, 1}, {8, 8, 8}, VectorAddKernel3D{});
289235
}
290236
}

0 commit comments

Comments
 (0)