Skip to content

Commit 79a3178

Browse files
authored
Merge pull request #5 from SCOREC/multi-slice-cab-factory
Multi slice cab factory
2 parents 04c3355 + d476e54 commit 79a3178

File tree

3 files changed

+281
-19
lines changed

3 files changed

+281
-19
lines changed

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,7 @@ target_link_libraries(SliceWrapper Omega_h::omega_h)
2222
target_link_libraries(SliceWrapper Cabana::cabanacore)
2323
target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA)
2424

25-
add_test(sliceWrapper ./SliceWrapper)
25+
add_test(sliceWrapper10 ./SliceWrapper 10)
26+
add_test(sliceWrapper32 ./SliceWrapper 32)
27+
add_test(sliceWrapper33 ./SliceWrapper 33)
28+
add_test(sliceWrapper50 ./SliceWrapper 50)

src/SliceWrapper.hpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,22 @@ struct SliceWrapper {
1111
SliceWrapper(SliceType st) : st_(st) {}
1212

1313
KOKKOS_INLINE_FUNCTION
14-
T& access(const int s, const int a) const {
14+
T& access(int s, int a) const {
1515
return st_.access(s,a);
1616
}
17-
int arraySize(int s) {
18-
return st_.arraySize(s);
17+
KOKKOS_INLINE_FUNCTION
18+
auto& access(int s, int a, int i) const {
19+
return st_.access(s,a,i);
1920
}
20-
int numSoA() {
21-
return st_.numSoA();
21+
KOKKOS_INLINE_FUNCTION
22+
auto& access(int s, int a, int i, int j) const {
23+
return st_.access(s,a,i,j);
2224
}
25+
KOKKOS_INLINE_FUNCTION
26+
auto& access(int s, int a, int i, int j, int k) const {
27+
return st_.access(s,a,i,j,k);
28+
}
29+
2330
};
2431

2532
using namespace Cabana;
@@ -33,6 +40,13 @@ class CabSliceFactory {
3340
static constexpr int vecLen = Cabana::AoSoA<DataTypes, DeviceType>::vector_length;
3441
private:
3542
using soa_t = SoA<DataTypes, vecLen>;
43+
44+
template <std::size_t index>
45+
using member_data_t = typename Cabana::MemberTypeAtIndex<index, DataTypes>::type;
46+
47+
template <std::size_t index>
48+
using member_value_t =
49+
typename std::remove_all_extents<member_data_t<index>>::type;
3650

3751
template <class T, int stride>
3852
using member_slice_t =
@@ -49,12 +63,16 @@ class CabSliceFactory {
4963
template <std::size_t index>
5064
auto makeSliceCab() {
5165
using type = std::tuple_element_t<index, TypeTuple>;
52-
const int stride = sizeof(soa_t) / sizeof(type);
66+
const int stride = sizeof(soa_t) / sizeof(member_value_t<index>);
5367
auto slice = Cabana::slice<index>(aosoa);
5468
return wrapper_slice_t< type, stride >(std::move(slice));
5569
}
5670

57-
CabSliceFactory(int n) : aosoa("sliceAoSoA", n) {}
71+
CabSliceFactory(int n) : aosoa("sliceAoSoA", n) {
72+
if (sizeof...(Ts) == 0) {
73+
throw std::invalid_argument("Must provide at least one member type in template definition");
74+
}
75+
}
5876
};
5977

6078

test/SliceWrapper.cpp

Lines changed: 252 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,175 @@
11
#include "SliceWrapper.hpp"
2+
#include <stdio.h>
23

3-
int main(int argc, char* argv[]) {
4-
// AoSoA parameters
5-
int num_tuples = 10;
4+
int rank1_array_test(int num_tuples) {
5+
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
6+
using MemorySpace = ExecutionSpace::memory_space;
7+
8+
const int width = 3;
69

7-
Kokkos::ScopeGuard scope_guard(argc, argv);
10+
// Slice Wrapper Factory
11+
CabSliceFactory<ExecutionSpace, MemorySpace,
12+
double[width]> cabSliceFactory(num_tuples);
13+
14+
auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>();
15+
16+
// simd_parallel_for setup
17+
Cabana::SimdPolicy<cabSliceFactory.vecLen, ExecutionSpace> simd_policy(0, num_tuples);
18+
19+
// kernel that reads and writes
20+
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
21+
for (int i = 0; i < width; i++) {
22+
double x = 42/(s+a+i+1.3);
23+
slice_wrapper0.access(s,a,i) = x;
24+
assert(slice_wrapper0.access(s,a,i) == x);
25+
}
26+
};
827

28+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank1_array_test");
29+
return 0;
30+
31+
}
32+
33+
int rank2_array_test(int num_tuples) {
34+
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
35+
using MemorySpace = ExecutionSpace::memory_space;
36+
37+
const int width = 3;
38+
const int height = 4;
39+
40+
// Slice Wrapper Factory
41+
CabSliceFactory<ExecutionSpace, MemorySpace,
42+
double[width][height]> cabSliceFactory(num_tuples);
43+
44+
auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>();
45+
46+
// simd_parallel_for setup
47+
Cabana::SimdPolicy<cabSliceFactory.vecLen, ExecutionSpace> simd_policy(0, num_tuples);
48+
49+
// kernel that reads and writes
50+
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
51+
for (int i = 0; i < width; i++) {
52+
for (int j = 0; j < height; j++) {
53+
double x = 42/(s+a+i+j+1.3);
54+
slice_wrapper0.access(s,a,i,j) = x;
55+
assert(slice_wrapper0.access(s,a,i,j) == x);
56+
}
57+
}
58+
};
59+
60+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank2_array_test");
61+
return 0;
62+
}
63+
64+
int rank3_array_test(int num_tuples) {
65+
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
66+
using MemorySpace = ExecutionSpace::memory_space;
67+
68+
const int width = 3;
69+
const int height = 4;
70+
const int depth = 2;
71+
72+
// Slice Wrapper Factory
73+
CabSliceFactory<ExecutionSpace, MemorySpace,
74+
double[width][height][depth]> cabSliceFactory(num_tuples);
75+
76+
auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>();
77+
78+
// simd_parallel_for setup
79+
Cabana::SimdPolicy<cabSliceFactory.vecLen, ExecutionSpace> simd_policy(0, num_tuples);
80+
81+
// kernel that reads and writes
82+
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
83+
for (int i = 0; i < width; i++) {
84+
for (int j = 0; j < height; j++) {
85+
for (int k = 0; k < depth; k++) {
86+
double x = 42/(s+a+i+j+k+1.3);
87+
slice_wrapper0.access(s,a,i,j,k) = x;
88+
assert(slice_wrapper0.access(s,a,i,j,k) == x);
89+
}
90+
}
91+
}
92+
};
93+
94+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank3_array_test");
95+
return 0;
96+
}
97+
98+
int mix_arrays_test(int num_tuples) {
99+
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
100+
using MemorySpace = ExecutionSpace::memory_space;
101+
102+
const int width = 3;
103+
const int height = 4;
104+
const int depth = 2;
105+
106+
// Slice Wrapper Factory
107+
CabSliceFactory<ExecutionSpace, MemorySpace,
108+
double[width], char, double[width][height][depth],
109+
float[width][height]> cabSliceFactory(num_tuples);
110+
111+
auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>();
112+
auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>();
113+
auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>();
114+
auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>();
115+
116+
// simd_parallel_for setup
117+
Cabana::SimdPolicy<cabSliceFactory.vecLen, ExecutionSpace> simd_policy(0, num_tuples);
118+
119+
// kernel that reads and writes
120+
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
121+
char x0 = 'a'+(s+a);
122+
slice_wrapper1.access(s,a) = x0;
123+
assert(slice_wrapper1.access(s,a) == x0);
124+
125+
for (int i = 0; i < width; i++) {
126+
double x1 = 42/(s+a+i+1.3);
127+
slice_wrapper0.access(s,a,i) = x1;
128+
assert(slice_wrapper0.access(s,a,i) == x1);
129+
130+
for (int j = 0; j < height; j++) {
131+
double x2 = float(x1/(j+1.2));
132+
slice_wrapper3.access(s,a,i,j) = x2;
133+
assert(slice_wrapper3.access(s,a,i,j) == x2);
134+
135+
for (int k = 0; k < depth; k++) {
136+
double x3 = x2*x1+k;
137+
slice_wrapper2.access(s,a,i,j,k) = x3;
138+
assert(slice_wrapper2.access(s,a,i,j,k) == x3);
139+
}
140+
}
141+
}
142+
};
143+
144+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_mix_arrays_test");
145+
return 0;
146+
}
147+
148+
int single_type_test(int num_tuples) {
149+
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
150+
using MemorySpace = ExecutionSpace::memory_space;
151+
152+
// Slice Wrapper Factory
153+
CabSliceFactory<ExecutionSpace, MemorySpace,
154+
double> cabSliceFactory(num_tuples);
155+
156+
auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>();
157+
158+
// simd_parallel_for setup
159+
Cabana::SimdPolicy<cabSliceFactory.vecLen, ExecutionSpace> simd_policy(0, num_tuples);
160+
161+
// kernel that reads and writes
162+
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
163+
double x = 42/(s+a+1.3);
164+
slice_wrapper0.access(s,a) = x;
165+
assert(slice_wrapper0.access(s,a) == x);
166+
};
167+
168+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_single_type_test");
169+
return 0;
170+
}
171+
172+
int multi_type_test(int num_tuples) {
9173
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
10174
using MemorySpace = ExecutionSpace::memory_space;
11175

@@ -23,20 +187,97 @@ int main(int argc, char* argv[]) {
23187

24188
// kernel that reads and writes
25189
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
26-
printf("s: %d, a: %d\n", s,a);
27190
double x = 42/(s+a+1.3);
191+
char c = 'a'+s+a;
28192
slice_wrapper0.access(s,a) = x;
29193
slice_wrapper1.access(s,a) = s+a;
30194
slice_wrapper2.access(s,a) = float(x);
31-
slice_wrapper3.access(s,a) = 'a'+s+a;
32-
printf("SW0 value: %lf\n", slice_wrapper0.access(s,a));
33-
printf("SW1 value: %d\n", slice_wrapper1.access(s,a));
34-
printf("SW2 value: %f\n", slice_wrapper2.access(s,a));
35-
printf("SW3 value: %c\n", slice_wrapper3.access(s,a));
195+
slice_wrapper3.access(s,a) = c;
196+
197+
assert(slice_wrapper0.access(s,a) == x);
198+
assert(slice_wrapper1.access(s,a) == s+a);
199+
assert(slice_wrapper2.access(s,a) == float(x));
200+
assert(slice_wrapper3.access(s,a) == c);
201+
};
202+
203+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_multi_type_test");
204+
return 0;
205+
}
206+
207+
int many_type_test(int num_tuples) {
208+
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
209+
using MemorySpace = ExecutionSpace::memory_space;
210+
211+
// Slice Wrapper Factory
212+
CabSliceFactory<ExecutionSpace, MemorySpace,
213+
long double, double, double,
214+
long unsigned int, float, int,
215+
short int, char, char> cabSliceFactory(num_tuples);
216+
217+
auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>();
218+
auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>();
219+
auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>();
220+
auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>();
221+
auto slice_wrapper4 = cabSliceFactory.makeSliceCab<4>();
222+
auto slice_wrapper5 = cabSliceFactory.makeSliceCab<5>();
223+
auto slice_wrapper6 = cabSliceFactory.makeSliceCab<6>();
224+
auto slice_wrapper7 = cabSliceFactory.makeSliceCab<7>();
225+
auto slice_wrapper8 = cabSliceFactory.makeSliceCab<8>();
226+
227+
// simd_parallel_for setup
228+
Cabana::SimdPolicy<cabSliceFactory.vecLen, ExecutionSpace> simd_policy(0, num_tuples);
229+
230+
// kernel that reads and writes
231+
auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) {
232+
double d0 = 42/(s+a+1.3);
233+
double d1 = d0*d0;
234+
double d2 = d1/123.456751;
235+
float f0 = 0;
236+
char c0 = 'a'+s+a;
237+
char c1 = 'a'+((s*a+a) % 26);
238+
int i0 = s+a;
239+
int i1 = s+a/int(d0);
240+
int i2 = i0+i1;
241+
slice_wrapper0.access(s,a) = d0;
242+
slice_wrapper1.access(s,a) = d1;
243+
slice_wrapper2.access(s,a) = d2;
244+
slice_wrapper3.access(s,a) = i0;
245+
slice_wrapper4.access(s,a) = f0;
246+
slice_wrapper5.access(s,a) = i1;
247+
slice_wrapper6.access(s,a) = i2;
248+
slice_wrapper7.access(s,a) = c0;
249+
slice_wrapper8.access(s,a) = c1;
250+
251+
assert(slice_wrapper0.access(s,a) == d0);
252+
assert(slice_wrapper1.access(s,a) == d1);
253+
assert(slice_wrapper2.access(s,a) == d2);
254+
assert(slice_wrapper3.access(s,a) == i0);
255+
assert(slice_wrapper4.access(s,a) == f0);
256+
assert(slice_wrapper5.access(s,a) == i1);
257+
assert(slice_wrapper6.access(s,a) == i2);
258+
assert(slice_wrapper7.access(s,a) == c0);
259+
assert(slice_wrapper8.access(s,a) == c1);
36260
};
37261

38-
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory");
262+
Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test");
263+
return 0;
264+
}
265+
266+
int main(int argc, char* argv[]) {
267+
// AoSoA parameters
268+
int num_tuples = atoi(argv[1]);
269+
270+
Kokkos::ScopeGuard scope_guard(argc, argv);
39271

272+
many_type_test(num_tuples);
273+
single_type_test(num_tuples);
274+
multi_type_test(num_tuples);
275+
276+
rank1_array_test(num_tuples);
277+
rank2_array_test(num_tuples);
278+
rank3_array_test(num_tuples);
279+
mix_arrays_test(num_tuples);
280+
40281
assert(cudaSuccess == cudaDeviceSynchronize());
41282
printf("done\n");
42283

0 commit comments

Comments
 (0)