|
13 | 13 | // each test binary is built for a single Alpaka backend |
14 | 14 | using namespace ALPAKA_ACCELERATOR_NAMESPACE; |
15 | 15 |
|
16 | | -static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel) "]"; |
17 | | - |
18 | 16 | struct VectorAddKernel { |
19 | 17 | template <typename TAcc, typename T> |
20 | 18 | ALPAKA_FN_ACC void operator()( |
@@ -58,233 +56,181 @@ struct VectorAddKernel3D { |
58 | 56 | } |
59 | 57 | }; |
60 | 58 |
|
61 | | -TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel), s_tag) { |
62 | | - SECTION("VectorAddKernel") { |
63 | | - // get the list of devices on the current platform |
64 | | - auto const& devices = cms::alpakatools::devices<Platform>(); |
65 | | - if (devices.empty()) { |
66 | | - std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) |
67 | | - << ", the test will be skipped.\n"; |
68 | | - return; |
69 | | - } |
70 | | - |
71 | | - // random number generator with a gaussian distribution |
72 | | - std::random_device rd{}; |
73 | | - std::default_random_engine rand{rd()}; |
74 | | - std::normal_distribution<float> dist{0., 1.}; |
75 | | - |
76 | | - // tolerance |
77 | | - constexpr float epsilon = 0.000001; |
78 | | - |
79 | | - // buffer size |
80 | | - constexpr size_t size = 1024 * 1024; |
81 | | - |
82 | | - // allocate input and output host buffers in pinned memory accessible by the Platform devices |
83 | | - auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
84 | | - auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
85 | | - auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
86 | | - |
87 | | - // fill the input buffers with random data, and the output buffer with zeros |
88 | | - for (size_t i = 0; i < size; ++i) { |
89 | | - in1_h[i] = dist(rand); |
90 | | - in2_h[i] = dist(rand); |
91 | | - out_h[i] = 0.; |
92 | | - } |
93 | | - |
94 | | - // run the test on each device |
95 | | - for (auto const& device : devices) { |
96 | | - std::cout << "Test 1D vector addition on " << alpaka::getName(device) << '\n'; |
97 | | - auto queue = Queue(device); |
98 | | - |
99 | | - // allocate input and output buffers on the device |
100 | | - auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
101 | | - auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
102 | | - auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
103 | | - |
104 | | - // copy the input data to the device; the size is known from the buffer objects |
105 | | - alpaka::memcpy(queue, in1_d, in1_h); |
106 | | - alpaka::memcpy(queue, in2_d, in2_h); |
107 | | - |
108 | | - // fill the output buffer with zeros; the size is known from the buffer objects |
109 | | - alpaka::memset(queue, out_d, 0.); |
110 | | - |
111 | | - // launch the 1-dimensional kernel with scalar size |
112 | | - auto div = cms::alpakatools::make_workdiv<Acc1D>(4, 4); |
113 | | - alpaka::exec<Acc1D>(queue, div, VectorAddKernel{}, in1_d.data(), in2_d.data(), out_d.data(), size); |
| 59 | +// test the 1-dimensional kernel on all devices |
| 60 | +template <typename TKernel> |
| 61 | +void testVectorAddKernel(std::size_t problem_size, std::size_t grid_size, std::size_t block_size, TKernel kernel) { |
| 62 | + // random number generator with a gaussian distribution |
| 63 | + std::random_device rd{}; |
| 64 | + std::default_random_engine rand{rd()}; |
| 65 | + std::normal_distribution<float> dist{0., 1.}; |
| 66 | + |
| 67 | + // tolerance |
| 68 | + constexpr float epsilon = 0.000001; |
| 69 | + |
| 70 | + // buffer size |
| 71 | + const size_t size = problem_size; |
| 72 | + |
| 73 | + // allocate input and output host buffers in pinned memory accessible by the Platform devices |
| 74 | + auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
| 75 | + auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
| 76 | + auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
| 77 | + |
| 78 | + // fill the input buffers with random data, and the output buffer with zeros |
| 79 | + for (size_t i = 0; i < size; ++i) { |
| 80 | + in1_h[i] = dist(rand); |
| 81 | + in2_h[i] = dist(rand); |
| 82 | + out_h[i] = 0.; |
| 83 | + } |
114 | 84 |
|
115 | | - // copy the results from the device to the host |
116 | | - alpaka::memcpy(queue, out_h, out_d); |
| 85 | + // run the test on each device |
| 86 | + for (auto const& device : cms::alpakatools::devices<Platform>()) { |
| 87 | + std::cout << "Test 1D vector addition on " << alpaka::getName(device) << " over " << problem_size << " values with " |
| 88 | + << grid_size << " blocks of " << block_size << " elements\n"; |
| 89 | + auto queue = Queue(device); |
117 | 90 |
|
118 | | - // wait for all the operations to complete |
119 | | - alpaka::wait(queue); |
| 91 | + // allocate input and output buffers on the device |
| 92 | + auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
| 93 | + auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
| 94 | + auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
120 | 95 |
|
121 | | - // check the results |
122 | | - for (size_t i = 0; i < size; ++i) { |
123 | | - float sum = in1_h[i] + in2_h[i]; |
124 | | - REQUIRE(out_h[i] < sum + epsilon); |
125 | | - REQUIRE(out_h[i] > sum - epsilon); |
126 | | - } |
| 96 | + // copy the input data to the device; the size is known from the buffer objects |
| 97 | + alpaka::memcpy(queue, in1_d, in1_h); |
| 98 | + alpaka::memcpy(queue, in2_d, in2_h); |
127 | 99 |
|
128 | | - // reset the output buffer on the device to all zeros |
129 | | - alpaka::memset(queue, out_d, 0.); |
| 100 | + // fill the output buffer with zeros; the size is known from the buffer objects |
| 101 | + alpaka::memset(queue, out_d, 0.); |
130 | 102 |
|
131 | | - // launch the 1-dimensional kernel with vector size |
132 | | - alpaka::exec<Acc1D>(queue, div, VectorAddKernel1D{}, in1_d.data(), in2_d.data(), out_d.data(), size); |
| 103 | + // launch the 1-dimensional kernel with scalar size |
| 104 | + auto div = cms::alpakatools::make_workdiv<Acc1D>(grid_size, block_size); |
| 105 | + alpaka::exec<Acc1D>(queue, div, kernel, in1_d.data(), in2_d.data(), out_d.data(), size); |
133 | 106 |
|
134 | | - // copy the results from the device to the host |
135 | | - alpaka::memcpy(queue, out_h, out_d); |
| 107 | + // copy the results from the device to the host |
| 108 | + alpaka::memcpy(queue, out_h, out_d); |
136 | 109 |
|
137 | | - // wait for all the operations to complete |
138 | | - alpaka::wait(queue); |
| 110 | + // wait for all the operations to complete |
| 111 | + alpaka::wait(queue); |
139 | 112 |
|
140 | | - // check the results |
141 | | - for (size_t i = 0; i < size; ++i) { |
142 | | - float sum = in1_h[i] + in2_h[i]; |
143 | | - REQUIRE(out_h[i] < sum + epsilon); |
144 | | - REQUIRE(out_h[i] > sum - epsilon); |
145 | | - } |
| 113 | + // check the results |
| 114 | + for (size_t i = 0; i < size; ++i) { |
| 115 | + float sum = in1_h[i] + in2_h[i]; |
| 116 | + REQUIRE(out_h[i] < sum + epsilon); |
| 117 | + REQUIRE(out_h[i] > sum - epsilon); |
146 | 118 | } |
147 | 119 | } |
148 | 120 | } |
149 | 121 |
|
150 | | -TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel2D), s_tag) { |
151 | | - SECTION("VectorAddKernel2D") { |
152 | | - // get the list of devices on the current platform |
153 | | - auto const& devices = cms::alpakatools::devices<Platform>(); |
154 | | - if (devices.empty()) { |
155 | | - std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) |
156 | | - << ", the test will be skipped.\n"; |
157 | | - return; |
158 | | - } |
159 | | - |
160 | | - // random number generator with a gaussian distribution |
161 | | - std::random_device rd{}; |
162 | | - std::default_random_engine rand{rd()}; |
163 | | - std::normal_distribution<float> dist{0., 1.}; |
164 | | - |
165 | | - // tolerance |
166 | | - constexpr float epsilon = 0.000001; |
167 | | - |
168 | | - // 3-dimensional and linearised buffer size |
169 | | - constexpr Vec2D ndsize = {16, 16}; |
170 | | - constexpr size_t size = ndsize.prod(); |
171 | | - |
172 | | - // allocate input and output host buffers in pinned memory accessible by the Platform devices |
173 | | - auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
174 | | - auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
175 | | - auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
176 | | - |
177 | | - // fill the input buffers with random data, and the output buffer with zeros |
178 | | - for (size_t i = 0; i < size; ++i) { |
179 | | - in1_h[i] = dist(rand); |
180 | | - in2_h[i] = dist(rand); |
181 | | - out_h[i] = 0.; |
182 | | - } |
| 122 | +// test the N-dimensional kernels on all devices |
| 123 | +template <typename TDim, typename TKernel> |
| 124 | +void testVectorAddKernelND(Vec<TDim> problem_size, Vec<TDim> grid_size, Vec<TDim> block_size, TKernel kernel) { |
| 125 | + // random number generator with a gaussian distribution |
| 126 | + std::random_device rd{}; |
| 127 | + std::default_random_engine rand{rd()}; |
| 128 | + std::normal_distribution<float> dist{0., 1.}; |
| 129 | + |
| 130 | + // tolerance |
| 131 | + constexpr float epsilon = 0.000001; |
| 132 | + |
| 133 | + // linearised buffer size |
| 134 | + const size_t size = problem_size.prod(); |
| 135 | + |
| 136 | + // allocate input and output host buffers in pinned memory accessible by the Platform devices |
| 137 | + auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
| 138 | + auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
| 139 | + auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
| 140 | + |
| 141 | + // fill the input buffers with random data, and the output buffer with zeros |
| 142 | + for (size_t i = 0; i < size; ++i) { |
| 143 | + in1_h[i] = dist(rand); |
| 144 | + in2_h[i] = dist(rand); |
| 145 | + out_h[i] = 0.; |
| 146 | + } |
183 | 147 |
|
184 | | - // run the test on each device |
185 | | - for (auto const& device : devices) { |
186 | | - std::cout << "Test 2D vector addition on " << alpaka::getName(device) << '\n'; |
187 | | - auto queue = Queue(device); |
| 148 | + // run the test on each device |
| 149 | + for (auto const& device : cms::alpakatools::devices<Platform>()) { |
| 150 | + std::cout << "Test " << TDim::value << "D vector addition on " << alpaka::getName(device) << " over " |
| 151 | + << problem_size << " values with " << grid_size << " blocks of " << block_size << " elements\n"; |
| 152 | + auto queue = Queue(device); |
188 | 153 |
|
189 | | - // allocate input and output buffers on the device |
190 | | - auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
191 | | - auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
192 | | - auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
| 154 | + // allocate input and output buffers on the device |
| 155 | + auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
| 156 | + auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
| 157 | + auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
193 | 158 |
|
194 | | - // copy the input data to the device; the size is known from the buffer objects |
195 | | - alpaka::memcpy(queue, in1_d, in1_h); |
196 | | - alpaka::memcpy(queue, in2_d, in2_h); |
| 159 | + // copy the input data to the device; the size is known from the buffer objects |
| 160 | + alpaka::memcpy(queue, in1_d, in1_h); |
| 161 | + alpaka::memcpy(queue, in2_d, in2_h); |
197 | 162 |
|
198 | | - // fill the output buffer with zeros; the size is known from the buffer objects |
199 | | - alpaka::memset(queue, out_d, 0.); |
| 163 | + // fill the output buffer with zeros; the size is known from the buffer objects |
| 164 | + alpaka::memset(queue, out_d, 0.); |
200 | 165 |
|
201 | | - // launch the 3-dimensional kernel |
202 | | - auto div = cms::alpakatools::make_workdiv<Acc2D>({4, 4}, {32, 32}); |
203 | | - alpaka::exec<Acc2D>(queue, div, VectorAddKernel2D{}, in1_d.data(), in2_d.data(), out_d.data(), ndsize); |
| 166 | + // launch the 3-dimensional kernel |
| 167 | + using AccND = Acc<TDim>; |
| 168 | + auto div = cms::alpakatools::make_workdiv<AccND>(grid_size, block_size); |
| 169 | + alpaka::exec<AccND>(queue, div, kernel, in1_d.data(), in2_d.data(), out_d.data(), problem_size); |
204 | 170 |
|
205 | | - // copy the results from the device to the host |
206 | | - alpaka::memcpy(queue, out_h, out_d); |
| 171 | + // copy the results from the device to the host |
| 172 | + alpaka::memcpy(queue, out_h, out_d); |
207 | 173 |
|
208 | | - // wait for all the operations to complete |
209 | | - alpaka::wait(queue); |
| 174 | + // wait for all the operations to complete |
| 175 | + alpaka::wait(queue); |
210 | 176 |
|
211 | | - // check the results |
212 | | - for (size_t i = 0; i < size; ++i) { |
213 | | - float sum = in1_h[i] + in2_h[i]; |
214 | | - REQUIRE(out_h[i] < sum + epsilon); |
215 | | - REQUIRE(out_h[i] > sum - epsilon); |
216 | | - } |
| 177 | + // check the results |
| 178 | + for (size_t i = 0; i < size; ++i) { |
| 179 | + float sum = in1_h[i] + in2_h[i]; |
| 180 | + REQUIRE(out_h[i] < sum + epsilon); |
| 181 | + REQUIRE(out_h[i] > sum - epsilon); |
217 | 182 | } |
218 | 183 | } |
219 | 184 | } |
220 | 185 |
|
221 | | -TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel3D), s_tag) { |
222 | | - SECTION("VectorAddKernel3D") { |
| 186 | +TEST_CASE("Test alpaka kernels for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend", |
| 187 | + "[" EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "]") { |
| 188 | + SECTION("Alpaka N-dimensional kernels") { |
223 | 189 | // get the list of devices on the current platform |
224 | 190 | auto const& devices = cms::alpakatools::devices<Platform>(); |
225 | 191 | if (devices.empty()) { |
226 | | - std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) |
227 | | - << ", the test will be skipped.\n"; |
228 | | - return; |
| 192 | + INFO("No devices available on the platform " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)); |
| 193 | + REQUIRE(not devices.empty()); |
229 | 194 | } |
230 | 195 |
|
231 | | - // random number generator with a gaussian distribution |
232 | | - std::random_device rd{}; |
233 | | - std::default_random_engine rand{rd()}; |
234 | | - std::normal_distribution<float> dist{0., 1.}; |
235 | | - |
236 | | - // tolerance |
237 | | - constexpr float epsilon = 0.000001; |
238 | | - |
239 | | - // 3-dimensional and linearised buffer size |
240 | | - constexpr Vec3D ndsize = {50, 125, 16}; |
241 | | - constexpr size_t size = ndsize.prod(); |
242 | | - |
243 | | - // allocate input and output host buffers in pinned memory accessible by the Platform devices |
244 | | - auto in1_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
245 | | - auto in2_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
246 | | - auto out_h = cms::alpakatools::make_host_buffer<float[], Platform>(size); |
247 | | - |
248 | | - // fill the input buffers with random data, and the output buffer with zeros |
249 | | - for (size_t i = 0; i < size; ++i) { |
250 | | - in1_h[i] = dist(rand); |
251 | | - in2_h[i] = dist(rand); |
252 | | - out_h[i] = 0.; |
253 | | - } |
254 | | - |
255 | | - // run the test on each device |
256 | | - for (auto const& device : devices) { |
257 | | - std::cout << "Test 3D vector addition on " << alpaka::getName(device) << '\n'; |
258 | | - auto queue = Queue(device); |
259 | | - |
260 | | - // allocate input and output buffers on the device |
261 | | - auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
262 | | - auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
263 | | - auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size); |
264 | | - |
265 | | - // copy the input data to the device; the size is known from the buffer objects |
266 | | - alpaka::memcpy(queue, in1_d, in1_h); |
267 | | - alpaka::memcpy(queue, in2_d, in2_h); |
268 | | - |
269 | | - // fill the output buffer with zeros; the size is known from the buffer objects |
270 | | - alpaka::memset(queue, out_d, 0.); |
271 | | - |
272 | | - // launch the 3-dimensional kernel |
273 | | - auto div = cms::alpakatools::make_workdiv<Acc3D>({5, 5, 1}, {4, 4, 4}); |
274 | | - alpaka::exec<Acc3D>(queue, div, VectorAddKernel3D{}, in1_d.data(), in2_d.data(), out_d.data(), ndsize); |
275 | | - |
276 | | - // copy the results from the device to the host |
277 | | - alpaka::memcpy(queue, out_h, out_d); |
278 | | - |
279 | | - // wait for all the operations to complete |
280 | | - alpaka::wait(queue); |
281 | | - |
282 | | - // check the results |
283 | | - for (size_t i = 0; i < size; ++i) { |
284 | | - float sum = in1_h[i] + in2_h[i]; |
285 | | - REQUIRE(out_h[i] < sum + epsilon); |
286 | | - REQUIRE(out_h[i] > sum - epsilon); |
287 | | - } |
288 | | - } |
| 196 | + // launch the 1-dimensional kernel with a small block size and a small number of blocks; |
| 197 | + // this relies on the kernel to loop over the "problem space" and do more work per block |
| 198 | + std::cout << "Test 1D vector addition with small block size, using scalar dimensions\n"; |
| 199 | + testVectorAddKernel(10000, 32, 32, VectorAddKernel{}); |
| 200 | + |
| 201 | + // launch the 1-dimensional kernel with a large block size and a single block; |
| 202 | + // this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data |
| 203 | + std::cout << "Test 1D vector addition with large block size, using scalar dimensions\n"; |
| 204 | + testVectorAddKernel(100, 1, 1024, VectorAddKernel{}); |
| 205 | + |
| 206 | + // launch the 1-dimensional kernel with a small block size and a small number of blocks; |
| 207 | + // this relies on the kernel to loop over the "problem space" and do more work per block |
| 208 | + std::cout << "Test 1D vector addition with small block size\n"; |
| 209 | + testVectorAddKernelND<Dim1D>({10000}, {32}, {32}, VectorAddKernel1D{}); |
| 210 | + |
| 211 | + // launch the 1-dimensional kernel with a large block size and a single block; |
| 212 | + // this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data |
| 213 | + std::cout << "Test 1D vector addition with large block size\n"; |
| 214 | + testVectorAddKernelND<Dim1D>({100}, {1}, {1024}, VectorAddKernel1D{}); |
| 215 | + |
| 216 | + // launch the 2-dimensional kernel with a small block size and a small number of blocks; |
| 217 | + // this relies on the kernel to loop over the "problem space" and do more work per block |
| 218 | + std::cout << "Test 2D vector addition with small block size\n"; |
| 219 | + testVectorAddKernelND<Dim2D>({400, 250}, {4, 4}, {16, 16}, VectorAddKernel2D{}); |
| 220 | + |
| 221 | + // launch the 2-dimensional kernel with a large block size and a single block; |
| 222 | + // this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data |
| 223 | + std::cout << "Test 2D vector addition with large block size\n"; |
| 224 | + testVectorAddKernelND<Dim2D>({20, 20}, {1, 1}, {32, 32}, VectorAddKernel2D{}); |
| 225 | + |
| 226 | + // launch the 3-dimensional kernel with a small block size and a small number of blocks; |
| 227 | + // this relies on the kernel to loop over the "problem space" and do more work per block |
| 228 | + std::cout << "Test 3D vector addition with small block size\n"; |
| 229 | + testVectorAddKernelND<Dim3D>({50, 125, 16}, {5, 5, 1}, {4, 4, 4}, VectorAddKernel3D{}); |
| 230 | + |
| 231 | + // launch the 3-dimensional kernel with a large block size and a single block; |
| 232 | + // this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data |
| 233 | + std::cout << "Test 3D vector addition with large block size\n"; |
| 234 | + testVectorAddKernelND<Dim3D>({5, 5, 5}, {1, 1, 1}, {8, 8, 8}, VectorAddKernel3D{}); |
289 | 235 | } |
290 | 236 | } |
0 commit comments