@@ -19,7 +19,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
1919 public:
2020 template <typename TAcc, typename = std::enable_if_t <alpaka::isAccelerator<TAcc>>>
2121 ALPAKA_FN_ACC void operator ()(TAcc const & acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
22- // global index of the thread within the grid
2322 const portabletest::Matrix matrix{{1 , 2 , 3 , 4 , 5 , 6 }, {2 , 4 , 6 , 8 , 10 , 12 }, {3 , 6 , 9 , 12 , 15 , 18 }};
2423 const portabletest::Array flags = {{6 , 4 , 2 , 0 }};
2524
@@ -41,12 +40,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
4140 ALPAKA_FN_ACC void operator ()(TAcc const & acc,
4241 portabletest::TestDeviceMultiCollection2::View<1 > view,
4342 double xvalue) const {
44- // global index of the thread within the grid
45- const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u ];
4643 const portabletest::Matrix matrix{{1 , 2 , 3 , 4 , 5 , 6 }, {2 , 4 , 6 , 8 , 10 , 12 }, {3 , 6 , 9 , 12 , 15 , 18 }};
4744
4845 // set this only once in the whole kernel grid
49- if (thread == 0 ) {
46+ if (once_per_grid (acc) ) {
5047 view.r2 () = 2 .;
5148 }
5249
@@ -63,12 +60,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
6360 ALPAKA_FN_ACC void operator ()(TAcc const & acc,
6461 portabletest::TestDeviceMultiCollection3::View<2 > view,
6562 double xvalue) const {
66- // global index of the thread within the grid
67- const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u ];
6863 const portabletest::Matrix matrix{{1 , 2 , 3 , 4 , 5 , 6 }, {2 , 4 , 6 , 8 , 10 , 12 }, {3 , 6 , 9 , 12 , 15 , 18 }};
6964
7065 // set this only once in the whole kernel grid
71- if (thread == 0 ) {
66+ if (once_per_grid (acc) ) {
7267 view.r3 () = 3 .;
7368 }
7469
@@ -342,4 +337,139 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
342337 return collection;
343338 }
344339
340+ class TestZeroCollectionKernel {
341+ public:
342+ template <typename TAcc, typename = std::enable_if_t <alpaka::isAccelerator<TAcc>>>
343+ ALPAKA_FN_ACC void operator ()(TAcc const & acc, portabletest::TestDeviceCollection::ConstView view) const {
344+ const portabletest::Matrix matrix{{0 , 0 , 0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 , 0 , 0 }};
345+ const portabletest::Array flags = {{0 , 0 , 0 , 0 }};
346+
347+ // check this only once in the whole kernel grid
348+ if (once_per_grid (acc)) {
349+ ALPAKA_ASSERT (view.r () == 0 .);
350+ }
351+
352+ // make a strided loop over the kernel grid, covering up to "size" elements
353+ for (int32_t i : uniform_elements (acc, view.metadata ().size ())) {
354+ auto element = view[i];
355+ ALPAKA_ASSERT (element.x () == 0 .);
356+ ALPAKA_ASSERT (element.y () == 0 .);
357+ ALPAKA_ASSERT (element.z () == 0 .);
358+ ALPAKA_ASSERT (element.id () == 0 .);
359+ ALPAKA_ASSERT (element.flags () == flags);
360+ ALPAKA_ASSERT (element.m () == matrix);
361+ }
362+ }
363+ };
364+
365+ class TestZeroMultiCollectionKernel2 {
366+ public:
367+ template <typename TAcc, typename = std::enable_if_t <alpaka::isAccelerator<TAcc>>>
368+ ALPAKA_FN_ACC void operator ()(TAcc const & acc, portabletest::TestDeviceMultiCollection2::ConstView<1 > view) const {
369+ const portabletest::Matrix matrix{{0 , 0 , 0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 , 0 , 0 }};
370+
371+ // check this only once in the whole kernel grid
372+ if (once_per_grid (acc)) {
373+ ALPAKA_ASSERT (view.r2 () == 0 .);
374+ }
375+
376+ // make a strided loop over the kernel grid, covering up to "size" elements
377+ for (int32_t i : uniform_elements (acc, view.metadata ().size ())) {
378+ auto element = view[i];
379+ ALPAKA_ASSERT (element.x2 () == 0 .);
380+ ALPAKA_ASSERT (element.y2 () == 0 .);
381+ ALPAKA_ASSERT (element.z2 () == 0 .);
382+ ALPAKA_ASSERT (element.id2 () == 0 .);
383+ ALPAKA_ASSERT (element.m2 () == matrix);
384+ }
385+ }
386+ };
387+
388+ class TestZeroMultiCollectionKernel3 {
389+ public:
390+ template <typename TAcc, typename = std::enable_if_t <alpaka::isAccelerator<TAcc>>>
391+ ALPAKA_FN_ACC void operator ()(TAcc const & acc, portabletest::TestDeviceMultiCollection3::ConstView<2 > view) const {
392+ const portabletest::Matrix matrix{{0 , 0 , 0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 , 0 , 0 }, {0 , 0 , 0 , 0 , 0 , 0 }};
393+
394+ // check this only once in the whole kernel grid
395+ if (once_per_grid (acc)) {
396+ ALPAKA_ASSERT (view.r3 () == 0 .);
397+ }
398+
399+ // make a strided loop over the kernel grid, covering up to "size" elements
400+ for (int32_t i : uniform_elements (acc, view.metadata ().size ())) {
401+ auto element = view[i];
402+ ALPAKA_ASSERT (element.x3 () == 0 .);
403+ ALPAKA_ASSERT (element.y3 () == 0 .);
404+ ALPAKA_ASSERT (element.z3 () == 0 .);
405+ ALPAKA_ASSERT (element.id3 () == 0 .);
406+ ALPAKA_ASSERT (element.m3 () == matrix);
407+ }
408+ }
409+ };
410+
411+ class TestZeroStructKernel {
412+ public:
413+ template <typename TAcc, typename = std::enable_if_t <alpaka::isAccelerator<TAcc>>>
414+ ALPAKA_FN_ACC void operator ()(TAcc const & acc, portabletest::TestDeviceObject::Product const * data) const {
415+ // check this only once in the whole kernel grid
416+ if (once_per_grid (acc)) {
417+ ALPAKA_ASSERT (data->x == 0 .);
418+ ALPAKA_ASSERT (data->y == 0 .);
419+ ALPAKA_ASSERT (data->z == 0 .);
420+ ALPAKA_ASSERT (data->id == 0 );
421+ }
422+ }
423+ };
424+
425+ // Check that the collection has been filled with zeroes.
426+ void TestAlgo::checkZero (Queue& queue, portabletest::TestDeviceCollection const & collection) const {
427+ // create a work division with a single block and
428+ // - 32 threads with a single element per thread on a GPU backend
429+ // - 32 elements within a single thread on a CPU backend
430+ auto workDiv = make_workdiv<Acc1D>(1 , 32 );
431+
432+ // the kernel will make a strided loop over the launch grid to cover all elements in the collection
433+ alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view ());
434+ }
435+
436+ // Check that the collection has been filled with zeroes.
437+ void TestAlgo::checkZero (Queue& queue, portabletest::TestDeviceMultiCollection2 const & collection) const {
438+ // create a work division with a single block and
439+ // - 32 threads with a single element per thread on a GPU backend
440+ // - 32 elements within a single thread on a CPU backend
441+ auto workDiv = make_workdiv<Acc1D>(1 , 32 );
442+
443+ // the kernels will make a strided loop over the launch grid to cover all elements in the collection
444+ alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view <portabletest::TestSoA>());
445+ alpaka::exec<Acc1D>(
446+ queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view <portabletest::TestSoA2>());
447+ }
448+
449+ // Check that the collection has been filled with zeroes.
450+ void TestAlgo::checkZero (Queue& queue, portabletest::TestDeviceMultiCollection3 const & collection) const {
451+ // create a work division with a single block and
452+ // - 32 threads with a single element per thread on a GPU backend
453+ // - 32 elements within a single thread on a CPU backend
454+ auto workDiv = make_workdiv<Acc1D>(1 , 32 );
455+
456+ // the kernels will make a strided loop over the launch grid to cover all elements in the collection
457+ alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view <portabletest::TestSoA>());
458+ alpaka::exec<Acc1D>(
459+ queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view <portabletest::TestSoA2>());
460+ alpaka::exec<Acc1D>(
461+ queue, workDiv, TestZeroMultiCollectionKernel3{}, collection.const_view <portabletest::TestSoA3>());
462+ }
463+
464+ // Check that the object has been filled with zeroes.
465+ void TestAlgo::checkZero (Queue& queue, portabletest::TestDeviceObject const & object) const {
466+ // create a work division with a single block and
467+ // - 32 threads with a single element per thread on a GPU backend
468+ // - 32 elements within a single thread on a CPU backend
469+ auto workDiv = make_workdiv<Acc1D>(1 , 32 );
470+
471+ // the kernel will actually use a single thread
472+ alpaka::exec<Acc1D>(queue, workDiv, TestZeroStructKernel{}, object.data ());
473+ }
474+
345475} // namespace ALPAKA_ACCELERATOR_NAMESPACE
0 commit comments