11#include " SliceWrapper.hpp"
2+ #include < stdio.h>
23
3- int main (int argc, char * argv[]) {
4- // AoSoA parameters
5- int num_tuples = 10 ;
4+ int rank1_array_test (int num_tuples) {
5+ using ExecutionSpace = Kokkos::DefaultExecutionSpace;
6+ using MemorySpace = ExecutionSpace::memory_space;
7+
8+ const int width = 3 ;
69
7- Kokkos::ScopeGuard scope_guard (argc, argv);
10+ // Slice Wrapper Factory
11+ CabSliceFactory<ExecutionSpace, MemorySpace,
12+ double [width]> cabSliceFactory (num_tuples);
13+
14+ auto slice_wrapper0 = cabSliceFactory.makeSliceCab <0 >();
15+
16+ // simd_parallel_for setup
17+ Cabana::SimdPolicy<cabSliceFactory.vecLen , ExecutionSpace> simd_policy (0 , num_tuples);
18+
19+ // kernel that reads and writes
20+ auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
21+ for (int i = 0 ; i < width; i++) {
22+ double x = 42 /(s+a+i+1.3 );
23+ slice_wrapper0.access (s,a,i) = x;
24+ assert (slice_wrapper0.access (s,a,i) == x);
25+ }
26+ };
827
28+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_rank1_array_test" );
29+ return 0 ;
30+
31+ }
32+
33+ int rank2_array_test (int num_tuples) {
34+ using ExecutionSpace = Kokkos::DefaultExecutionSpace;
35+ using MemorySpace = ExecutionSpace::memory_space;
36+
37+ const int width = 3 ;
38+ const int height = 4 ;
39+
40+ // Slice Wrapper Factory
41+ CabSliceFactory<ExecutionSpace, MemorySpace,
42+ double [width][height]> cabSliceFactory (num_tuples);
43+
44+ auto slice_wrapper0 = cabSliceFactory.makeSliceCab <0 >();
45+
46+ // simd_parallel_for setup
47+ Cabana::SimdPolicy<cabSliceFactory.vecLen , ExecutionSpace> simd_policy (0 , num_tuples);
48+
49+ // kernel that reads and writes
50+ auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
51+ for (int i = 0 ; i < width; i++) {
52+ for (int j = 0 ; j < height; j++) {
53+ double x = 42 /(s+a+i+j+1.3 );
54+ slice_wrapper0.access (s,a,i,j) = x;
55+ assert (slice_wrapper0.access (s,a,i,j) == x);
56+ }
57+ }
58+ };
59+
60+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_rank2_array_test" );
61+ return 0 ;
62+ }
63+
64+ int rank3_array_test (int num_tuples) {
65+ using ExecutionSpace = Kokkos::DefaultExecutionSpace;
66+ using MemorySpace = ExecutionSpace::memory_space;
67+
68+ const int width = 3 ;
69+ const int height = 4 ;
70+ const int depth = 2 ;
71+
72+ // Slice Wrapper Factory
73+ CabSliceFactory<ExecutionSpace, MemorySpace,
74+ double [width][height][depth]> cabSliceFactory (num_tuples);
75+
76+ auto slice_wrapper0 = cabSliceFactory.makeSliceCab <0 >();
77+
78+ // simd_parallel_for setup
79+ Cabana::SimdPolicy<cabSliceFactory.vecLen , ExecutionSpace> simd_policy (0 , num_tuples);
80+
81+ // kernel that reads and writes
82+ auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
83+ for (int i = 0 ; i < width; i++) {
84+ for (int j = 0 ; j < height; j++) {
85+ for (int k = 0 ; k < depth; k++) {
86+ double x = 42 /(s+a+i+j+k+1.3 );
87+ slice_wrapper0.access (s,a,i,j,k) = x;
88+ assert (slice_wrapper0.access (s,a,i,j,k) == x);
89+ }
90+ }
91+ }
92+ };
93+
94+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_rank3_array_test" );
95+ return 0 ;
96+ }
97+
98+ int mix_arrays_test (int num_tuples) {
99+ using ExecutionSpace = Kokkos::DefaultExecutionSpace;
100+ using MemorySpace = ExecutionSpace::memory_space;
101+
102+ const int width = 3 ;
103+ const int height = 4 ;
104+ const int depth = 2 ;
105+
106+ // Slice Wrapper Factory
107+ CabSliceFactory<ExecutionSpace, MemorySpace,
108+ double [width], char , double [width][height][depth],
109+ float [width][height]> cabSliceFactory (num_tuples);
110+
111+ auto slice_wrapper0 = cabSliceFactory.makeSliceCab <0 >();
112+ auto slice_wrapper1 = cabSliceFactory.makeSliceCab <1 >();
113+ auto slice_wrapper2 = cabSliceFactory.makeSliceCab <2 >();
114+ auto slice_wrapper3 = cabSliceFactory.makeSliceCab <3 >();
115+
116+ // simd_parallel_for setup
117+ Cabana::SimdPolicy<cabSliceFactory.vecLen , ExecutionSpace> simd_policy (0 , num_tuples);
118+
119+ // kernel that reads and writes
120+ auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
121+ char x0 = ' a' +(s+a);
122+ slice_wrapper1.access (s,a) = x0;
123+ assert (slice_wrapper1.access (s,a) == x0);
124+
125+ for (int i = 0 ; i < width; i++) {
126+ double x1 = 42 /(s+a+i+1.3 );
127+ slice_wrapper0.access (s,a,i) = x1;
128+ assert (slice_wrapper0.access (s,a,i) == x1);
129+
130+ for (int j = 0 ; j < height; j++) {
131+ double x2 = float (x1/(j+1.2 ));
132+ slice_wrapper3.access (s,a,i,j) = x2;
133+ assert (slice_wrapper3.access (s,a,i,j) == x2);
134+
135+ for (int k = 0 ; k < depth; k++) {
136+ double x3 = x2*x1+k;
137+ slice_wrapper2.access (s,a,i,j,k) = x3;
138+ assert (slice_wrapper2.access (s,a,i,j,k) == x3);
139+ }
140+ }
141+ }
142+ };
143+
144+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_mix_arrays_test" );
145+ return 0 ;
146+ }
147+
148+ int single_type_test (int num_tuples) {
149+ using ExecutionSpace = Kokkos::DefaultExecutionSpace;
150+ using MemorySpace = ExecutionSpace::memory_space;
151+
152+ // Slice Wrapper Factory
153+ CabSliceFactory<ExecutionSpace, MemorySpace,
154+ double > cabSliceFactory (num_tuples);
155+
156+ auto slice_wrapper0 = cabSliceFactory.makeSliceCab <0 >();
157+
158+ // simd_parallel_for setup
159+ Cabana::SimdPolicy<cabSliceFactory.vecLen , ExecutionSpace> simd_policy (0 , num_tuples);
160+
161+ // kernel that reads and writes
162+ auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
163+ double x = 42 /(s+a+1.3 );
164+ slice_wrapper0.access (s,a) = x;
165+ assert (slice_wrapper0.access (s,a) == x);
166+ };
167+
168+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_single_type_test" );
169+ return 0 ;
170+ }
171+
172+ int multi_type_test (int num_tuples) {
9173 using ExecutionSpace = Kokkos::DefaultExecutionSpace;
10174 using MemorySpace = ExecutionSpace::memory_space;
11175
@@ -23,20 +187,97 @@ int main(int argc, char* argv[]) {
23187
24188 // kernel that reads and writes
25189 auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
26- printf (" s: %d, a: %d\n " , s,a);
27190 double x = 42 /(s+a+1.3 );
191+ char c = ' a' +s+a;
28192 slice_wrapper0.access (s,a) = x;
29193 slice_wrapper1.access (s,a) = s+a;
30194 slice_wrapper2.access (s,a) = float (x);
31- slice_wrapper3.access (s,a) = ' a' +s+a;
32- printf (" SW0 value: %lf\n " , slice_wrapper0.access (s,a));
33- printf (" SW1 value: %d\n " , slice_wrapper1.access (s,a));
34- printf (" SW2 value: %f\n " , slice_wrapper2.access (s,a));
35- printf (" SW3 value: %c\n " , slice_wrapper3.access (s,a));
195+ slice_wrapper3.access (s,a) = c;
196+
197+ assert (slice_wrapper0.access (s,a) == x);
198+ assert (slice_wrapper1.access (s,a) == s+a);
199+ assert (slice_wrapper2.access (s,a) == float (x));
200+ assert (slice_wrapper3.access (s,a) == c);
201+ };
202+
203+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_multi_type_test" );
204+ return 0 ;
205+ }
206+
207+ int many_type_test (int num_tuples) {
208+ using ExecutionSpace = Kokkos::DefaultExecutionSpace;
209+ using MemorySpace = ExecutionSpace::memory_space;
210+
211+ // Slice Wrapper Factory
212+ CabSliceFactory<ExecutionSpace, MemorySpace,
213+ long double , double , double ,
214+ long unsigned int , float , int ,
215+ short int , char , char > cabSliceFactory (num_tuples);
216+
217+ auto slice_wrapper0 = cabSliceFactory.makeSliceCab <0 >();
218+ auto slice_wrapper1 = cabSliceFactory.makeSliceCab <1 >();
219+ auto slice_wrapper2 = cabSliceFactory.makeSliceCab <2 >();
220+ auto slice_wrapper3 = cabSliceFactory.makeSliceCab <3 >();
221+ auto slice_wrapper4 = cabSliceFactory.makeSliceCab <4 >();
222+ auto slice_wrapper5 = cabSliceFactory.makeSliceCab <5 >();
223+ auto slice_wrapper6 = cabSliceFactory.makeSliceCab <6 >();
224+ auto slice_wrapper7 = cabSliceFactory.makeSliceCab <7 >();
225+ auto slice_wrapper8 = cabSliceFactory.makeSliceCab <8 >();
226+
227+ // simd_parallel_for setup
228+ Cabana::SimdPolicy<cabSliceFactory.vecLen , ExecutionSpace> simd_policy (0 , num_tuples);
229+
230+ // kernel that reads and writes
231+ auto vector_kernel = KOKKOS_LAMBDA (const int s, const int a) {
232+ double d0 = 42 /(s+a+1.3 );
233+ double d1 = d0*d0;
234+ double d2 = d1/123.456751 ;
235+ float f0 = 0 ;
236+ char c0 = ' a' +s+a;
237+ char c1 = ' a' +((s*a+a) % 26 );
238+ int i0 = s+a;
239+ int i1 = s+a/int (d0);
240+ int i2 = i0+i1;
241+ slice_wrapper0.access (s,a) = d0;
242+ slice_wrapper1.access (s,a) = d1;
243+ slice_wrapper2.access (s,a) = d2;
244+ slice_wrapper3.access (s,a) = i0;
245+ slice_wrapper4.access (s,a) = f0;
246+ slice_wrapper5.access (s,a) = i1;
247+ slice_wrapper6.access (s,a) = i2;
248+ slice_wrapper7.access (s,a) = c0;
249+ slice_wrapper8.access (s,a) = c1;
250+
251+ assert (slice_wrapper0.access (s,a) == d0);
252+ assert (slice_wrapper1.access (s,a) == d1);
253+ assert (slice_wrapper2.access (s,a) == d2);
254+ assert (slice_wrapper3.access (s,a) == i0);
255+ assert (slice_wrapper4.access (s,a) == f0);
256+ assert (slice_wrapper5.access (s,a) == i1);
257+ assert (slice_wrapper6.access (s,a) == i2);
258+ assert (slice_wrapper7.access (s,a) == c0);
259+ assert (slice_wrapper8.access (s,a) == c1);
36260 };
37261
38- Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_cabSliceFactory" );
262+ Cabana::simd_parallel_for (simd_policy, vector_kernel, " parallel_for_many_type_test" );
263+ return 0 ;
264+ }
265+
266+ int main (int argc, char * argv[]) {
267+ // AoSoA parameters
268+ int num_tuples = atoi (argv[1 ]);
269+
270+ Kokkos::ScopeGuard scope_guard (argc, argv);
39271
272+ many_type_test (num_tuples);
273+ single_type_test (num_tuples);
274+ multi_type_test (num_tuples);
275+
276+ rank1_array_test (num_tuples);
277+ rank2_array_test (num_tuples);
278+ rank3_array_test (num_tuples);
279+ mix_arrays_test (num_tuples);
280+
40281 assert (cudaSuccess == cudaDeviceSynchronize ());
41282 printf (" done\n " );
42283
0 commit comments