Skip to content

Commit 83fdff9

Browse files
authored
Merge pull request #48629 from leobeltra/soa_blocks
Added SoABlocks feature
2 parents 9b48379 + e9bc89c commit 83fdff9

16 files changed

+1579
-23
lines changed

DataFormats/Portable/interface/PortableCollectionCommon.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ namespace portablecollection {
9898
template <typename T, typename... Args>
9999
inline constexpr std::size_t typeIndex = TypeIndex<T, Args...>::value;
100100

101+
// concept to check if a Layout has a static member blocksNumber
102+
template <class L>
103+
concept hasBlocksNumber = requires { L::blocksNumber; };
104+
101105
} // namespace portablecollection
102106

103107
#endif // DataFormats_Portable_interface_PortableCollectionCommon_h

DataFormats/Portable/interface/PortableDeviceCollection.h

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define DataFormats_Portable_interface_PortableDeviceCollection_h
33

44
#include <cassert>
5+
#include <concepts>
56
#include <optional>
67
#include <type_traits>
78

@@ -32,14 +33,16 @@ class PortableDeviceCollection {
3233
explicit PortableDeviceCollection(edm::Uninitialized) noexcept {}
3334

3435
PortableDeviceCollection(int32_t elements, TDev const& device)
36+
requires(!portablecollection::hasBlocksNumber<Layout>)
3537
: buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(device, Layout::computeDataSize(elements))},
3638
layout_{buffer_->data(), elements},
3739
view_{layout_} {
3840
// Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128
3941
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
4042
}
4143

42-
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
44+
template <typename TQueue>
45+
requires(alpaka::isQueue<TQueue> && (!portablecollection::hasBlocksNumber<Layout>))
4346
PortableDeviceCollection(int32_t elements, TQueue const& queue)
4447
: buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(queue, Layout::computeDataSize(elements))},
4548
layout_{buffer_->data(), elements},
@@ -48,6 +51,44 @@ class PortableDeviceCollection {
4851
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
4952
}
5053

54+
// constructor for SoA by blocks with a variadic of sizes
55+
template <std::integral... Ints>
56+
requires(portablecollection::hasBlocksNumber<Layout>)
57+
explicit PortableDeviceCollection(TDev const& device, Ints... sizes)
58+
requires(sizeof...(sizes) == Layout::blocksNumber)
59+
: PortableDeviceCollection(device, std::to_array({static_cast<int32_t>(sizes)...})) {}
60+
61+
// constructor for SoA by blocks with a variadic of sizes
62+
template <typename TQueue, std::integral... Ints>
63+
requires(alpaka::isQueue<TQueue> && portablecollection::hasBlocksNumber<Layout>)
64+
explicit PortableDeviceCollection(TQueue const& queue, Ints... sizes)
65+
requires(sizeof...(sizes) == Layout::blocksNumber)
66+
: PortableDeviceCollection(queue, std::to_array({static_cast<int32_t>(sizes)...})) {}
67+
68+
// constructor for SoA by blocks with an array of sizes
69+
template <std::size_t N>
70+
requires(portablecollection::hasBlocksNumber<Layout>)
71+
explicit PortableDeviceCollection(TDev const& device, std::array<int32_t, N> const& sizes)
72+
: buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(device, Layout::computeDataSize(sizes))},
73+
layout_{buffer_->data(), sizes},
74+
view_{layout_} {
75+
static_assert(Layout::blocksNumber == N, "Number of sizes must match the number of blocks in the Layout");
76+
// Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128
77+
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
78+
}
79+
80+
// constructor for SoA by blocks with an array of sizes
81+
template <typename TQueue, std::size_t N>
82+
requires(alpaka::isQueue<TQueue> && portablecollection::hasBlocksNumber<Layout>)
83+
explicit PortableDeviceCollection(TQueue const& queue, std::array<int32_t, N> const& sizes)
84+
: buffer_{cms::alpakatools::make_device_buffer<std::byte[]>(queue, Layout::computeDataSize(sizes))},
85+
layout_{buffer_->data(), sizes},
86+
view_{layout_} {
87+
static_assert(Layout::blocksNumber == N, "Number of sizes must match the number of blocks in the Layout");
88+
// Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128
89+
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
90+
}
91+
5192
// non-copyable
5293
PortableDeviceCollection(PortableDeviceCollection const&) = delete;
5394
PortableDeviceCollection& operator=(PortableDeviceCollection const&) = delete;
@@ -76,13 +117,16 @@ class PortableDeviceCollection {
76117
ConstBuffer const_buffer() const { return *buffer_; }
77118

78119
// erases the data in the Buffer by writing zeros (bytes containing '\0') to it
79-
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
120+
template <typename TQueue>
121+
requires(alpaka::isQueue<TQueue>)
80122
void zeroInitialise(TQueue&& queue) {
81123
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
82124
}
83125

84126
// Copy column by column heterogeneously for device to host/device data transfer.
127+
// TODO: implement heterogeneous deepCopy for SoA blocks
85128
template <typename TQueue>
129+
requires(alpaka::isQueue<TQueue> && (!portablecollection::hasBlocksNumber<Layout>))
86130
void deepCopy(ConstView const& view, TQueue& queue) {
87131
ConstDescriptor desc{view};
88132
Descriptor desc_{view_};

DataFormats/Portable/interface/PortableHostCollection.h

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define DataFormats_Portable_interface_PortableHostCollection_h
33

44
#include <cassert>
5+
#include <concepts>
56
#include <optional>
67

78
#include <alpaka/alpaka.hpp>
@@ -29,6 +30,7 @@ class PortableHostCollection {
2930
explicit PortableHostCollection(edm::Uninitialized) noexcept {};
3031

3132
PortableHostCollection(int32_t elements, alpaka_common::DevHost const& host)
33+
requires(!portablecollection::hasBlocksNumber<Layout>)
3234
// allocate pageable host memory
3335
: buffer_{cms::alpakatools::make_host_buffer<std::byte[]>(Layout::computeDataSize(elements))},
3436
layout_{buffer_->data(), elements},
@@ -37,7 +39,8 @@ class PortableHostCollection {
3739
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
3840
}
3941

40-
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
42+
template <typename TQueue>
43+
requires(alpaka::isQueue<TQueue> && (!portablecollection::hasBlocksNumber<Layout>))
4144
PortableHostCollection(int32_t elements, TQueue const& queue)
4245
// allocate pinned host memory associated to the given work queue, accessible by the queue's device
4346
: buffer_{cms::alpakatools::make_host_buffer<std::byte[]>(queue, Layout::computeDataSize(elements))},
@@ -49,6 +52,48 @@ class PortableHostCollection {
4952

5053
// constructor for code that does not use alpaka explicitly, using the global "host" object returned by cms::alpakatools::host()
5154
PortableHostCollection(int32_t elements) : PortableHostCollection(elements, cms::alpakatools::host()) {}
55+
// constructor for SoA by blocks with a variadic of sizes
56+
57+
template <std::integral... Ints>
58+
requires(portablecollection::hasBlocksNumber<Layout>)
59+
explicit PortableHostCollection(alpaka_common::DevHost const& host, Ints... sizes)
60+
requires(sizeof...(sizes) == Layout::blocksNumber)
61+
// allocate pageable host memory
62+
: PortableHostCollection(host, std::to_array({static_cast<int32_t>(sizes)...})) {}
63+
64+
// constructor for SoA by blocks with a variadic of sizes
65+
template <typename TQueue, std::integral... Ints>
66+
requires(alpaka::isQueue<TQueue> && portablecollection::hasBlocksNumber<Layout>)
67+
explicit PortableHostCollection(TQueue const& queue, Ints... sizes)
68+
requires(sizeof...(sizes) == Layout::blocksNumber)
69+
// allocate pinned host memory associated to the given work queue, accessible by the queue's device
70+
: PortableHostCollection(queue, std::to_array({static_cast<int32_t>(sizes)...})) {}
71+
72+
// constructor for SoA by blocks with an array of sizes
73+
template <std::size_t N>
74+
requires(portablecollection::hasBlocksNumber<Layout>)
75+
explicit PortableHostCollection(alpaka_common::DevHost const& host, std::array<int32_t, N> const& sizes)
76+
// allocate pageable host memory
77+
: buffer_{cms::alpakatools::make_host_buffer<std::byte[]>(Layout::computeDataSize(sizes))},
78+
layout_{buffer_->data(), sizes},
79+
view_{layout_} {
80+
static_assert(Layout::blocksNumber == N, "Number of sizes must match the number of blocks in the Layout");
81+
// Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128
82+
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
83+
}
84+
85+
// constructor for SoA by blocks with an array of sizes
86+
template <typename TQueue, std::size_t N>
87+
requires(alpaka::isQueue<TQueue> && portablecollection::hasBlocksNumber<Layout>)
88+
explicit PortableHostCollection(TQueue const& queue, std::array<int32_t, N> const& sizes)
89+
// allocate pinned host memory associated to the given work queue, accessible by the queue's device
90+
: buffer_{cms::alpakatools::make_host_buffer<std::byte[]>(queue, Layout::computeDataSize(sizes))},
91+
layout_{buffer_->data(), sizes},
92+
view_{layout_} {
93+
static_assert(Layout::blocksNumber == N, "Number of sizes must match the number of blocks in the Layout");
94+
// Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128
95+
assert(reinterpret_cast<uintptr_t>(buffer_->data()) % Layout::alignment == 0);
96+
}
5297

5398
// non-copyable
5499
PortableHostCollection(PortableHostCollection const&) = delete;
@@ -82,7 +127,8 @@ class PortableHostCollection {
82127
std::memset(std::data(*buffer_), 0x00, alpaka::getExtentProduct(*buffer_) * sizeof(std::byte));
83128
}
84129

85-
template <typename TQueue, typename = std::enable_if_t<alpaka::isQueue<TQueue>>>
130+
template <typename TQueue>
131+
requires(alpaka::isQueue<TQueue>)
86132
void zeroInitialise(TQueue&& queue) {
87133
alpaka::memset(std::forward<TQueue>(queue), *buffer_, 0x00);
88134
}
@@ -99,12 +145,14 @@ class PortableHostCollection {
99145
layout.ROOTStreamerCleaner();
100146
}
101147

102-
// Copy column by column the content of the given view into this PortableHostCollection.
148+
// Copy column by column the content of the given ConstView into this PortableHostCollection.
103149
// The view must point to data in host memory.
104150
void deepCopy(ConstView const& view) { layout_.deepCopy(view); }
105151

106152
// Copy column by column heterogeneously for device to host data transfer.
153+
// TODO: implement heterogeneous deepCopy for SoA blocks
107154
template <typename TQueue>
155+
requires(alpaka::isQueue<TQueue> && (!portablecollection::hasBlocksNumber<Layout>))
108156
void deepCopy(ConstView const& view, TQueue& queue) {
109157
ConstDescriptor desc{view};
110158
Descriptor desc_{view_};
Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
<bin name="TestDataFormatsPortableOnHost" file="test_catch2_*.cc">
2-
<use name="DataFormats/Portable"/>
3-
<use name="DataFormats/SoATemplate"/>
42
<use name="catch2"/>
53
<use name="eigen"/>
4+
<use name="DataFormats/Portable"/>
5+
<use name="DataFormats/SoATemplate"/>
66
</bin>
77

88
<bin name="TestDataFormatsPortableHeterogenous" file="alpaka/test_catch2_heterogeneousDeepCopy.dev.cc">
9+
<use name="catch2"/>
10+
<use name="eigen"/>
911
<use name="DataFormats/Portable"/>
1012
<use name="DataFormats/SoATemplate"/>
13+
<use name="HeterogeneousCore/AlpakaInterface"/>
14+
<flags ALPAKA_BACKENDS="1"/>
15+
</bin>
16+
17+
<bin name="TestDataFormatsPortableSoABlocks" file="alpaka/test_catch2_heterogeneousSoABlocks.dev.cc">
1118
<use name="catch2"/>
1219
<use name="eigen"/>
20+
<use name="DataFormats/Portable"/>
21+
<use name="DataFormats/SoATemplate"/>
1322
<use name="HeterogeneousCore/AlpakaInterface"/>
1423
<flags ALPAKA_BACKENDS="1"/>
1524
</bin>
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#include <Eigen/Core>
2+
#include <Eigen/Dense>
3+
4+
#include <alpaka/alpaka.hpp>
5+
6+
#define CATCH_CONFIG_MAIN
7+
#include <catch2/catch_all.hpp>
8+
9+
#include "DataFormats/SoATemplate/interface/SoABlocks.h"
10+
#include "DataFormats/Portable/interface/PortableCollection.h"
11+
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
12+
#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
13+
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
14+
15+
using namespace ALPAKA_ACCELERATOR_NAMESPACE;
16+
17+
// This test checks the correctness of using SoABlocks with PortableCollections.
18+
19+
GENERATE_SOA_LAYOUT(NodesT, SOA_COLUMN(int, id), SOA_SCALAR(int, count))
20+
21+
using Nodes = NodesT<>;
22+
23+
GENERATE_SOA_LAYOUT(EdgesT, SOA_COLUMN(int, src), SOA_COLUMN(int, dst), SOA_COLUMN(float, cost), SOA_SCALAR(int, count))
24+
25+
using Edges = EdgesT<>;
26+
27+
GENERATE_SOA_BLOCKS(GraphT, SOA_BLOCK(nodes, NodesT), SOA_BLOCK(edges, EdgesT))
28+
29+
using Graph = GraphT<>;
30+
using GraphView = Graph::View;
31+
using GraphConstView = Graph::ConstView;
32+
33+
// Fill SoAs
34+
struct FillSoAs {
35+
ALPAKA_FN_ACC void operator()(Acc1D const& acc, Nodes::View nodes, Edges::View edges) const {
36+
const int N = static_cast<int>(nodes.metadata().size());
37+
const int E = static_cast<int>(edges.metadata().size());
38+
39+
// Fill nodes with the indexes
40+
for (auto i : cms::alpakatools::uniform_elements(acc, nodes.metadata().size())) {
41+
nodes[i].id() = static_cast<int>(i);
42+
}
43+
if (cms::alpakatools::once_per_grid(acc)) {
44+
nodes.count() = N;
45+
}
46+
47+
// Fill edges with some arbitrary but deterministic values
48+
for (auto j : cms::alpakatools::uniform_elements(acc, edges.metadata().size())) {
49+
int src = static_cast<int>(j % N);
50+
int dst = static_cast<int>((j * 7 + 3) % N);
51+
edges[j].src() = src;
52+
edges[j].dst() = dst;
53+
edges[j].cost() = 0.5f * float(src + dst);
54+
}
55+
if (cms::alpakatools::once_per_grid(acc)) {
56+
edges.count() = E;
57+
}
58+
}
59+
};
60+
61+
// Fill SoABlocks
62+
struct FillBlocks {
63+
ALPAKA_FN_ACC void operator()(Acc1D const& acc, GraphView blocksView) const {
64+
const int N = static_cast<int>(blocksView.nodes().metadata().size());
65+
const int E = static_cast<int>(blocksView.edges().metadata().size());
66+
67+
// Fill nodes with the indexes
68+
for (auto i : cms::alpakatools::uniform_elements(acc, blocksView.nodes().metadata().size())) {
69+
blocksView.nodes()[i].id() = static_cast<int>(i);
70+
}
71+
if (cms::alpakatools::once_per_grid(acc)) {
72+
blocksView.nodes().count() = N;
73+
}
74+
75+
// Fill edges with some arbitrary but deterministic values
76+
for (auto j : cms::alpakatools::uniform_elements(acc, blocksView.edges().metadata().size())) {
77+
int src = static_cast<int>(j % N);
78+
int dst = static_cast<int>((j * 7 + 3) % N);
79+
blocksView.edges()[j].src() = src;
80+
blocksView.edges()[j].dst() = dst;
81+
blocksView.edges()[j].cost() = 0.5f * float(src + dst);
82+
}
83+
if (cms::alpakatools::once_per_grid(acc)) {
84+
blocksView.edges().count() = E;
85+
}
86+
}
87+
};
88+
89+
TEST_CASE("SoABlocks minimal graph in heterogeneous environment") {
90+
auto const& devices = cms::alpakatools::devices<Platform>();
91+
if (devices.empty()) {
92+
std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
93+
<< " backend, skipping.\n";
94+
return;
95+
}
96+
97+
for (auto const& device : devices) {
98+
std::cout << "Running on " << alpaka::getName(device) << std::endl;
99+
Queue queue(device);
100+
101+
// Number of elements
102+
const int N = 50;
103+
const int E = 120;
104+
105+
// Portable Collections for SoAs
106+
PortableCollection<Nodes, Device> nodesCollection(N, queue);
107+
PortableCollection<Edges, Device> edgesCollection(E, queue);
108+
Nodes::View& nodesCollectionView = nodesCollection.view();
109+
Edges::View& edgesCollectionView = edgesCollection.view();
110+
111+
// Portable Collection for SoABlocks
112+
PortableCollection<Graph, Device> graphCollection(queue, N, E);
113+
GraphView& graphCollectionView = graphCollection.view();
114+
115+
// Work division
116+
const std::size_t blockSize = 256;
117+
const std::size_t maxElems = std::max<std::size_t>(N, E);
118+
const std::size_t numberOfBlocks = cms::alpakatools::divide_up_by(maxElems, blockSize);
119+
const auto workDiv = cms::alpakatools::make_workdiv<Acc1D>(numberOfBlocks, blockSize);
120+
121+
// Fill: separate e blocks
122+
alpaka::exec<Acc1D>(queue, workDiv, FillSoAs{}, nodesCollectionView, edgesCollectionView);
123+
alpaka::exec<Acc1D>(queue, workDiv, FillBlocks{}, graphCollectionView);
124+
alpaka::wait(queue);
125+
126+
// Check results on host
127+
PortableHostCollection<Nodes> nodesHost(N, cms::alpakatools::host());
128+
PortableHostCollection<Edges> edgesHost(E, cms::alpakatools::host());
129+
PortableHostCollection<Graph> graphHost(cms::alpakatools::host(), N, E);
130+
131+
alpaka::memcpy(queue, nodesHost.buffer(), nodesCollection.buffer());
132+
alpaka::memcpy(queue, edgesHost.buffer(), edgesCollection.buffer());
133+
alpaka::memcpy(queue, graphHost.buffer(), graphCollection.buffer());
134+
alpaka::wait(queue);
135+
136+
const Nodes::ConstView nodesHostView = nodesHost.const_view();
137+
const Edges::ConstView edgesHostView = edgesHost.const_view();
138+
const GraphConstView graphHostView = graphHost.const_view();
139+
140+
// Nodes
141+
REQUIRE(graphHostView.nodes().count() == N);
142+
for (int i = 0; i < N; ++i) {
143+
REQUIRE(graphHostView.nodes()[i].id() == nodesHostView[i].id());
144+
REQUIRE(graphHostView.nodes()[i].id() == i);
145+
}
146+
147+
// Edges
148+
REQUIRE(graphHostView.edges().count() == E);
149+
for (int j = 0; j < E; ++j) {
150+
REQUIRE(graphHostView.edges()[j].src() == edgesHostView[j].src());
151+
REQUIRE(graphHostView.edges()[j].dst() == edgesHostView[j].dst());
152+
REQUIRE(graphHostView.edges()[j].cost() == edgesHostView[j].cost());
153+
154+
int src = j % N;
155+
int dst = (j * 7 + 3) % N;
156+
REQUIRE(graphHostView.edges()[j].src() == src);
157+
REQUIRE(graphHostView.edges()[j].dst() == dst);
158+
REQUIRE_THAT(graphHostView.edges()[j].cost(), Catch::Matchers::WithinAbs(0.5f * float(src + dst), 1e-6));
159+
}
160+
}
161+
}

0 commit comments

Comments
 (0)