Skip to content

Commit 146b187

Browse files
committed
Add tests for the zeroInitialise() method
1 parent 87ce3a0 commit 146b187

File tree

4 files changed

+153
-10
lines changed

4 files changed

+153
-10
lines changed

DataFormats/PortableTestObjects/test/TestSoA.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// A minimal test to ensure that
22
// - portabletest::TestSoA can be compiled
33
// - portabletest::TestHostCollection can be allocated
4+
// - portabletest::TestHostCollection can be erased
45
// - view-based element access works
56

67
#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h"
@@ -14,6 +15,8 @@ int main() {
1415
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
1516
const portabletest::Array flags = {{6, 4, 2, 0}};
1617

18+
collection.zeroInitialise();
19+
1720
collection.view().r() = 1.;
1821

1922
for (int i = 0; i < size; ++i) {

HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc

Lines changed: 137 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
1919
public:
2020
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
2121
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
22-
// global index of the thread within the grid
2322
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
2423
const portabletest::Array flags = {{6, 4, 2, 0}};
2524

@@ -41,12 +40,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
4140
ALPAKA_FN_ACC void operator()(TAcc const& acc,
4241
portabletest::TestDeviceMultiCollection2::View<1> view,
4342
double xvalue) const {
44-
// global index of the thread within the grid
45-
const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
4643
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
4744

4845
// set this only once in the whole kernel grid
49-
if (thread == 0) {
46+
if (once_per_grid(acc)) {
5047
view.r2() = 2.;
5148
}
5249

@@ -63,12 +60,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
6360
ALPAKA_FN_ACC void operator()(TAcc const& acc,
6461
portabletest::TestDeviceMultiCollection3::View<2> view,
6562
double xvalue) const {
66-
// global index of the thread within the grid
67-
const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
6863
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
6964

7065
// set this only once in the whole kernel grid
71-
if (thread == 0) {
66+
if (once_per_grid(acc)) {
7267
view.r3() = 3.;
7368
}
7469

@@ -342,4 +337,139 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
342337
return collection;
343338
}
344339

340+
class TestZeroCollectionKernel {
341+
public:
342+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
343+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::ConstView view) const {
344+
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
345+
const portabletest::Array flags = {{0, 0, 0, 0}};
346+
347+
// check this only once in the whole kernel grid
348+
if (once_per_grid(acc)) {
349+
ALPAKA_ASSERT(view.r() == 0.);
350+
}
351+
352+
// make a strided loop over the kernel grid, covering up to "size" elements
353+
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
354+
auto element = view[i];
355+
ALPAKA_ASSERT(element.x() == 0.);
356+
ALPAKA_ASSERT(element.y() == 0.);
357+
ALPAKA_ASSERT(element.z() == 0.);
358+
ALPAKA_ASSERT(element.id() == 0.);
359+
ALPAKA_ASSERT(element.flags() == flags);
360+
ALPAKA_ASSERT(element.m() == matrix);
361+
}
362+
}
363+
};
364+
365+
class TestZeroMultiCollectionKernel2 {
366+
public:
367+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
368+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection2::ConstView<1> view) const {
369+
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
370+
371+
// check this only once in the whole kernel grid
372+
if (once_per_grid(acc)) {
373+
ALPAKA_ASSERT(view.r2() == 0.);
374+
}
375+
376+
// make a strided loop over the kernel grid, covering up to "size" elements
377+
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
378+
auto element = view[i];
379+
ALPAKA_ASSERT(element.x2() == 0.);
380+
ALPAKA_ASSERT(element.y2() == 0.);
381+
ALPAKA_ASSERT(element.z2() == 0.);
382+
ALPAKA_ASSERT(element.id2() == 0.);
383+
ALPAKA_ASSERT(element.m2() == matrix);
384+
}
385+
}
386+
};
387+
388+
class TestZeroMultiCollectionKernel3 {
389+
public:
390+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
391+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection3::ConstView<2> view) const {
392+
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
393+
394+
// check this only once in the whole kernel grid
395+
if (once_per_grid(acc)) {
396+
ALPAKA_ASSERT(view.r3() == 0.);
397+
}
398+
399+
// make a strided loop over the kernel grid, covering up to "size" elements
400+
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
401+
auto element = view[i];
402+
ALPAKA_ASSERT(element.x3() == 0.);
403+
ALPAKA_ASSERT(element.y3() == 0.);
404+
ALPAKA_ASSERT(element.z3() == 0.);
405+
ALPAKA_ASSERT(element.id3() == 0.);
406+
ALPAKA_ASSERT(element.m3() == matrix);
407+
}
408+
}
409+
};
410+
411+
class TestZeroStructKernel {
412+
public:
413+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
414+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceObject::Product const* data) const {
415+
// check this only once in the whole kernel grid
416+
if (once_per_grid(acc)) {
417+
ALPAKA_ASSERT(data->x == 0.);
418+
ALPAKA_ASSERT(data->y == 0.);
419+
ALPAKA_ASSERT(data->z == 0.);
420+
ALPAKA_ASSERT(data->id == 0);
421+
}
422+
}
423+
};
424+
425+
// Check that the collection has been filled with zeroes.
426+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const {
427+
// create a work division with a single block and
428+
// - 32 threads with a single element per thread on a GPU backend
429+
// - 32 elements within a single thread on a CPU backend
430+
auto workDiv = make_workdiv<Acc1D>(1, 32);
431+
432+
// the kernel will make a strided loop over the launch grid to cover all elements in the collection
433+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view());
434+
}
435+
436+
// Check that the collection has been filled with zeroes.
437+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const {
438+
// create a work division with a single block and
439+
// - 32 threads with a single element per thread on a GPU backend
440+
// - 32 elements within a single thread on a CPU backend
441+
auto workDiv = make_workdiv<Acc1D>(1, 32);
442+
443+
// the kernels will make a strided loop over the launch grid to cover all elements in the collection
444+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
445+
alpaka::exec<Acc1D>(
446+
queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view<portabletest::TestSoA2>());
447+
}
448+
449+
// Check that the collection has been filled with zeroes.
450+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const {
451+
// create a work division with a single block and
452+
// - 32 threads with a single element per thread on a GPU backend
453+
// - 32 elements within a single thread on a CPU backend
454+
auto workDiv = make_workdiv<Acc1D>(1, 32);
455+
456+
// the kernels will make a strided loop over the launch grid to cover all elements in the collection
457+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
458+
alpaka::exec<Acc1D>(
459+
queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view<portabletest::TestSoA2>());
460+
alpaka::exec<Acc1D>(
461+
queue, workDiv, TestZeroMultiCollectionKernel3{}, collection.const_view<portabletest::TestSoA3>());
462+
}
463+
464+
// Check that the object has been filled with zeroes.
465+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const {
466+
// create a work division with a single block and
467+
// - 32 threads with a single element per thread on a GPU backend
468+
// - 32 elements within a single thread on a CPU backend
469+
auto workDiv = make_workdiv<Acc1D>(1, 32);
470+
471+
// the kernel will actually use a single thread
472+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroStructKernel{}, object.data());
473+
}
474+
345475
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
2626

2727
void fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue = 0.) const;
2828
void fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue = 0.) const;
29+
30+
void checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const;
31+
void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const;
32+
void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const;
33+
void checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const;
2934
};
3035

3136
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
3030
void produce(edm::StreamID sid, device::Event& event, device::EventSetup const&) const override {
3131
// run the algorithm, potentially asynchronously
3232
portabletest::TestDeviceCollection deviceCollection{size_, event.queue()};
33+
deviceCollection.zeroInitialise(event.queue());
34+
algo_.checkZero(event.queue(), deviceCollection);
3335
algo_.fill(event.queue(), deviceCollection);
3436

3537
portabletest::TestDeviceObject deviceObject{event.queue()};
38+
deviceObject.zeroInitialise(event.queue());
39+
algo_.checkZero(event.queue(), deviceObject);
3640
algo_.fillObject(event.queue(), deviceObject, 5., 12., 13., 42);
3741

38-
portabletest::TestDeviceCollection deviceProduct{size_, event.queue()};
39-
algo_.fill(event.queue(), deviceProduct);
40-
4142
portabletest::TestDeviceMultiCollection2 deviceMultiProduct2{{{size_, size2_}}, event.queue()};
43+
deviceMultiProduct2.zeroInitialise(event.queue());
44+
algo_.checkZero(event.queue(), deviceMultiProduct2);
4245
algo_.fillMulti2(event.queue(), deviceMultiProduct2);
4346

4447
portabletest::TestDeviceMultiCollection3 deviceMultiProduct3{{{size_, size2_, size3_}}, event.queue()};
48+
deviceMultiProduct3.zeroInitialise(event.queue());
49+
algo_.checkZero(event.queue(), deviceMultiProduct3);
4550
algo_.fillMulti3(event.queue(), deviceMultiProduct3);
4651

4752
// put the asynchronous products into the event without waiting

0 commit comments

Comments
 (0)