Skip to content

Commit 82dc60c

Browse files
authored
Merge pull request #45861 from fwyzard/PortableCollection_zero_memory
Add a `zeroInitialise()` method to portable objects and collections
2 parents d1f3825 + 146b187 commit 82dc60c

File tree

8 files changed

+208
-12
lines changed

8 files changed

+208
-12
lines changed

DataFormats/Portable/interface/PortableDeviceCollection.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ class PortableDeviceCollection {
7070
ConstBuffer buffer() const { return *buffer_; }
7171
ConstBuffer const_buffer() const { return *buffer_; }
7272

73+
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
74+
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
75+
void zeroInitialise(TQueue&& queue) {
76+
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
77+
}
78+
7379
private:
7480
std::optional<Buffer> buffer_; //!
7581
Layout layout_; //
@@ -275,7 +281,13 @@ class PortableDeviceMultiCollection {
275281
ConstBuffer buffer() const { return *buffer_; }
276282
ConstBuffer const_buffer() const { return *buffer_; }
277283

278-
// Extract the sizes array
284+
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
285+
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
286+
void zeroInitialise(TQueue&& queue) {
287+
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
288+
}
289+
290+
// extract the sizes array
279291
SizesArray sizes() const {
280292
SizesArray ret;
281293
portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get<i>().layout_.metadata().size(); });

DataFormats/Portable/interface/PortableDeviceObject.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,11 @@ class PortableDeviceObject {
5050
// access the product
5151
Product& value() { return *buffer_->data(); }
5252
Product const& value() const { return *buffer_->data(); }
53+
Product const& const_value() const { return *buffer_->data(); }
5354

5455
Product* data() { return buffer_->data(); }
5556
Product const* data() const { return buffer_->data(); }
57+
Product const* const_data() const { return buffer_->data(); }
5658

5759
Product& operator*() { return *buffer_->data(); }
5860
Product const& operator*() const { return *buffer_->data(); }
@@ -65,6 +67,12 @@ class PortableDeviceObject {
6567
ConstBuffer buffer() const { return *buffer_; }
6668
ConstBuffer const_buffer() const { return *buffer_; }
6769

70+
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
71+
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
72+
void zeroInitialise(TQueue&& queue) {
73+
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
74+
}
75+
6876
private:
6977
std::optional<Buffer> buffer_;
7078
};

DataFormats/Portable/interface/PortableHostCollection.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@ class PortableHostCollection {
6969
ConstBuffer buffer() const { return *buffer_; }
7070
ConstBuffer const_buffer() const { return *buffer_; }
7171

72+
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
73+
void zeroInitialise() {
74+
std::memset(std::data(*buffer_), 0x00, alpaka::getExtentProduct(*buffer_) * sizeof(std::byte));
75+
}
76+
77+
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
78+
void zeroInitialise(TQueue&& queue) {
79+
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
80+
}
81+
7282
// part of the ROOT read streamer
7383
static void ROOTReadStreamer(PortableHostCollection* newObj, Layout& layout) {
7484
// destroy the default-constructed collection
@@ -278,12 +288,23 @@ class PortableHostMultiCollection {
278288
ConstBuffer buffer() const { return *buffer_; }
279289
ConstBuffer const_buffer() const { return *buffer_; }
280290

281-
// Extract the sizes array
291+
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
292+
void zeroInitialise() {
293+
std::memset(std::data(*buffer_), 0x00, alpaka::getExtentProduct(*buffer_) * sizeof(std::byte));
294+
}
295+
296+
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
297+
void zeroInitialise(TQueue&& queue) {
298+
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
299+
}
300+
301+
// extract the sizes array
282302
SizesArray sizes() const {
283303
SizesArray ret;
284304
portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get<i>().layout_.metadata().size(); });
285305
return ret;
286306
}
307+
287308
// part of the ROOT read streamer
288309
static void ROOTReadStreamer(PortableHostMultiCollection* newObj, Implementation& onfileImpl) {
289310
newObj->~PortableHostMultiCollection();

DataFormats/Portable/interface/PortableHostObject.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ class PortableHostObject {
4848
// access the product
4949
Product& value() { return *product_; }
5050
Product const& value() const { return *product_; }
51+
Product const& const_value() const { return *product_; }
5152

5253
Product* data() { return product_; }
5354
Product const* data() const { return product_; }
55+
Product const* const_data() const { return product_; }
5456

5557
Product& operator*() { return *product_; }
5658
Product const& operator*() const { return *product_; }
@@ -63,6 +65,16 @@ class PortableHostObject {
6365
ConstBuffer buffer() const { return *buffer_; }
6466
ConstBuffer const_buffer() const { return *buffer_; }
6567

68+
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
69+
void zeroInitialise() {
70+
std::memset(std::data(*buffer_), 0x00, alpaka::getExtentProduct(*buffer_) * sizeof(std::byte));
71+
}
72+
73+
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
74+
void zeroInitialise(TQueue&& queue) {
75+
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
76+
}
77+
6678
// part of the ROOT read streamer
6779
static void ROOTReadStreamer(PortableHostObject* newObj, Product& product) {
6880
// destroy the default-constructed object

DataFormats/PortableTestObjects/test/TestSoA.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// A minimal test to ensure that
22
// - portabletest::TestSoA can be compiled
33
// - portabletest::TestHostCollection can be allocated
4+
// - portabletest::TestHostCollection can be erased
45
// - view-based element access works
56

67
#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h"
@@ -14,6 +15,8 @@ int main() {
1415
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
1516
const portabletest::Array flags = {{6, 4, 2, 0}};
1617

18+
collection.zeroInitialise();
19+
1720
collection.view().r() = 1.;
1821

1922
for (int i = 0; i < size; ++i) {

HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc

Lines changed: 137 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
1919
public:
2020
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
2121
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const {
22-
// global index of the thread within the grid
2322
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
2423
const portabletest::Array flags = {{6, 4, 2, 0}};
2524

@@ -41,12 +40,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
4140
ALPAKA_FN_ACC void operator()(TAcc const& acc,
4241
portabletest::TestDeviceMultiCollection2::View<1> view,
4342
double xvalue) const {
44-
// global index of the thread within the grid
45-
const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
4643
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
4744

4845
// set this only once in the whole kernel grid
49-
if (thread == 0) {
46+
if (once_per_grid(acc)) {
5047
view.r2() = 2.;
5148
}
5249

@@ -63,12 +60,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
6360
ALPAKA_FN_ACC void operator()(TAcc const& acc,
6461
portabletest::TestDeviceMultiCollection3::View<2> view,
6562
double xvalue) const {
66-
// global index of the thread within the grid
67-
const int32_t thread = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0u];
6863
const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}};
6964

7065
// set this only once in the whole kernel grid
71-
if (thread == 0) {
66+
if (once_per_grid(acc)) {
7267
view.r3() = 3.;
7368
}
7469

@@ -342,4 +337,139 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
342337
return collection;
343338
}
344339

340+
class TestZeroCollectionKernel {
341+
public:
342+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
343+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::ConstView view) const {
344+
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
345+
const portabletest::Array flags = {{0, 0, 0, 0}};
346+
347+
// check this only once in the whole kernel grid
348+
if (once_per_grid(acc)) {
349+
ALPAKA_ASSERT(view.r() == 0.);
350+
}
351+
352+
// make a strided loop over the kernel grid, covering up to "size" elements
353+
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
354+
auto element = view[i];
355+
ALPAKA_ASSERT(element.x() == 0.);
356+
ALPAKA_ASSERT(element.y() == 0.);
357+
ALPAKA_ASSERT(element.z() == 0.);
358+
ALPAKA_ASSERT(element.id() == 0.);
359+
ALPAKA_ASSERT(element.flags() == flags);
360+
ALPAKA_ASSERT(element.m() == matrix);
361+
}
362+
}
363+
};
364+
365+
class TestZeroMultiCollectionKernel2 {
366+
public:
367+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
368+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection2::ConstView<1> view) const {
369+
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
370+
371+
// check this only once in the whole kernel grid
372+
if (once_per_grid(acc)) {
373+
ALPAKA_ASSERT(view.r2() == 0.);
374+
}
375+
376+
// make a strided loop over the kernel grid, covering up to "size" elements
377+
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
378+
auto element = view[i];
379+
ALPAKA_ASSERT(element.x2() == 0.);
380+
ALPAKA_ASSERT(element.y2() == 0.);
381+
ALPAKA_ASSERT(element.z2() == 0.);
382+
ALPAKA_ASSERT(element.id2() == 0.);
383+
ALPAKA_ASSERT(element.m2() == matrix);
384+
}
385+
}
386+
};
387+
388+
class TestZeroMultiCollectionKernel3 {
389+
public:
390+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
391+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceMultiCollection3::ConstView<2> view) const {
392+
const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}};
393+
394+
// check this only once in the whole kernel grid
395+
if (once_per_grid(acc)) {
396+
ALPAKA_ASSERT(view.r3() == 0.);
397+
}
398+
399+
// make a strided loop over the kernel grid, covering up to "size" elements
400+
for (int32_t i : uniform_elements(acc, view.metadata().size())) {
401+
auto element = view[i];
402+
ALPAKA_ASSERT(element.x3() == 0.);
403+
ALPAKA_ASSERT(element.y3() == 0.);
404+
ALPAKA_ASSERT(element.z3() == 0.);
405+
ALPAKA_ASSERT(element.id3() == 0.);
406+
ALPAKA_ASSERT(element.m3() == matrix);
407+
}
408+
}
409+
};
410+
411+
class TestZeroStructKernel {
412+
public:
413+
template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
414+
ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceObject::Product const* data) const {
415+
// check this only once in the whole kernel grid
416+
if (once_per_grid(acc)) {
417+
ALPAKA_ASSERT(data->x == 0.);
418+
ALPAKA_ASSERT(data->y == 0.);
419+
ALPAKA_ASSERT(data->z == 0.);
420+
ALPAKA_ASSERT(data->id == 0);
421+
}
422+
}
423+
};
424+
425+
// Check that the collection has been filled with zeroes.
426+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const {
427+
// create a work division with a single block and
428+
// - 32 threads with a single element per thread on a GPU backend
429+
// - 32 elements within a single thread on a CPU backend
430+
auto workDiv = make_workdiv<Acc1D>(1, 32);
431+
432+
// the kernel will make a strided loop over the launch grid to cover all elements in the collection
433+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view());
434+
}
435+
436+
// Check that the collection has been filled with zeroes.
437+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const {
438+
// create a work division with a single block and
439+
// - 32 threads with a single element per thread on a GPU backend
440+
// - 32 elements within a single thread on a CPU backend
441+
auto workDiv = make_workdiv<Acc1D>(1, 32);
442+
443+
// the kernels will make a strided loop over the launch grid to cover all elements in the collection
444+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
445+
alpaka::exec<Acc1D>(
446+
queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view<portabletest::TestSoA2>());
447+
}
448+
449+
// Check that the collection has been filled with zeroes.
450+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const {
451+
// create a work division with a single block and
452+
// - 32 threads with a single element per thread on a GPU backend
453+
// - 32 elements within a single thread on a CPU backend
454+
auto workDiv = make_workdiv<Acc1D>(1, 32);
455+
456+
// the kernels will make a strided loop over the launch grid to cover all elements in the collection
457+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view<portabletest::TestSoA>());
458+
alpaka::exec<Acc1D>(
459+
queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view<portabletest::TestSoA2>());
460+
alpaka::exec<Acc1D>(
461+
queue, workDiv, TestZeroMultiCollectionKernel3{}, collection.const_view<portabletest::TestSoA3>());
462+
}
463+
464+
// Check that the object has been filled with zeroes.
465+
void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const {
466+
// create a work division with a single block and
467+
// - 32 threads with a single element per thread on a GPU backend
468+
// - 32 elements within a single thread on a CPU backend
469+
auto workDiv = make_workdiv<Acc1D>(1, 32);
470+
471+
// the kernel will actually use a single thread
472+
alpaka::exec<Acc1D>(queue, workDiv, TestZeroStructKernel{}, object.data());
473+
}
474+
345475
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
2626

2727
void fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue = 0.) const;
2828
void fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue = 0.) const;
29+
30+
void checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const;
31+
void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const;
32+
void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const;
33+
void checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const;
2934
};
3035

3136
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
3030
void produce(edm::StreamID sid, device::Event& event, device::EventSetup const&) const override {
3131
// run the algorithm, potentially asynchronously
3232
portabletest::TestDeviceCollection deviceCollection{size_, event.queue()};
33+
deviceCollection.zeroInitialise(event.queue());
34+
algo_.checkZero(event.queue(), deviceCollection);
3335
algo_.fill(event.queue(), deviceCollection);
3436

3537
portabletest::TestDeviceObject deviceObject{event.queue()};
38+
deviceObject.zeroInitialise(event.queue());
39+
algo_.checkZero(event.queue(), deviceObject);
3640
algo_.fillObject(event.queue(), deviceObject, 5., 12., 13., 42);
3741

38-
portabletest::TestDeviceCollection deviceProduct{size_, event.queue()};
39-
algo_.fill(event.queue(), deviceProduct);
40-
4142
portabletest::TestDeviceMultiCollection2 deviceMultiProduct2{{{size_, size2_}}, event.queue()};
43+
deviceMultiProduct2.zeroInitialise(event.queue());
44+
algo_.checkZero(event.queue(), deviceMultiProduct2);
4245
algo_.fillMulti2(event.queue(), deviceMultiProduct2);
4346

4447
portabletest::TestDeviceMultiCollection3 deviceMultiProduct3{{{size_, size2_, size3_}}, event.queue()};
48+
deviceMultiProduct3.zeroInitialise(event.queue());
49+
algo_.checkZero(event.queue(), deviceMultiProduct3);
4550
algo_.fillMulti3(event.queue(), deviceMultiProduct3);
4651

4752
// put the asynchronous products into the event without waiting

0 commit comments

Comments
 (0)