diff --git a/CMakeLists.txt b/CMakeLists.txt index 4abce126..7d0a7fd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,4 +22,7 @@ target_link_libraries(SliceWrapper Omega_h::omega_h) target_link_libraries(SliceWrapper Cabana::cabanacore) target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA) -add_test(sliceWrapper ./SliceWrapper) +add_test(sliceWrapper10 ./SliceWrapper 10) +add_test(sliceWrapper32 ./SliceWrapper 32) +add_test(sliceWrapper33 ./SliceWrapper 33) +add_test(sliceWrapper50 ./SliceWrapper 50) diff --git a/src/SliceWrapper.hpp b/src/SliceWrapper.hpp index 8e9db0b1..0c852610 100644 --- a/src/SliceWrapper.hpp +++ b/src/SliceWrapper.hpp @@ -11,15 +11,22 @@ struct SliceWrapper { SliceWrapper(SliceType st) : st_(st) {} KOKKOS_INLINE_FUNCTION - T& access(const int s, const int a) const { + T& access(int s, int a) const { return st_.access(s,a); } - int arraySize(int s) { - return st_.arraySize(s); + KOKKOS_INLINE_FUNCTION + auto& access(int s, int a, int i) const { + return st_.access(s,a,i); } - int numSoA() { - return st_.numSoA(); + KOKKOS_INLINE_FUNCTION + auto& access(int s, int a, int i, int j) const { + return st_.access(s,a,i,j); } + KOKKOS_INLINE_FUNCTION + auto& access(int s, int a, int i, int j, int k) const { + return st_.access(s,a,i,j,k); + } + }; using namespace Cabana; @@ -33,6 +40,13 @@ class CabSliceFactory { static constexpr int vecLen = Cabana::AoSoA::vector_length; private: using soa_t = SoA; + + template + using member_data_t = typename Cabana::MemberTypeAtIndex::type; + + template + using member_value_t = + typename std::remove_all_extents>::type; template using member_slice_t = @@ -49,12 +63,16 @@ class CabSliceFactory { template auto makeSliceCab() { using type = std::tuple_element_t; - const int stride = sizeof(soa_t) / sizeof(type); + const int stride = sizeof(soa_t) / sizeof(member_value_t); auto slice = Cabana::slice(aosoa); return wrapper_slice_t< type, stride >(std::move(slice)); } - CabSliceFactory(int n) : aosoa("sliceAoSoA", n) {} + CabSliceFactory(int n) : aosoa("sliceAoSoA", n) { + if (sizeof...(Ts) == 0) { + throw std::invalid_argument("Must provide at least one member type in template definition"); + } + } }; diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index cbd26dce..f151a72c 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -1,11 +1,175 @@ #include "SliceWrapper.hpp" +#include -int main(int argc, char* argv[]) { - // AoSoA parameters - int num_tuples = 10; +int rank1_array_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; - Kokkos::ScopeGuard scope_guard(argc, argv); + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + for (int i = 0; i < width; i++) { + double x = 42/(s+a+i+1.3); + slice_wrapper0.access(s,a,i) = x; + assert(slice_wrapper0.access(s,a,i) == x); + } + }; + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank1_array_test"); + return 0; + +} + +int rank2_array_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + const int height = 4; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + double x = 42/(s+a+i+j+1.3); + slice_wrapper0.access(s,a,i,j) = x; + assert(slice_wrapper0.access(s,a,i,j) == x); + } + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank2_array_test"); + return 0; +} + +int rank3_array_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + const int height = 4; + const int depth = 2; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + double x = 42/(s+a+i+j+k+1.3); + slice_wrapper0.access(s,a,i,j,k) = x; + assert(slice_wrapper0.access(s,a,i,j,k) == x); + } + } + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank3_array_test"); + return 0; +} + +int mix_arrays_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + const int height = 4; + const int depth = 2; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); + auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); + auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + char x0 = 'a'+(s+a); + slice_wrapper1.access(s,a) = x0; + assert(slice_wrapper1.access(s,a) == x0); + + for (int i = 0; i < width; i++) { + double x1 = 42/(s+a+i+1.3); + slice_wrapper0.access(s,a,i) = x1; + assert(slice_wrapper0.access(s,a,i) == x1); + + for (int j = 0; j < height; j++) { + double x2 = float(x1/(j+1.2)); + slice_wrapper3.access(s,a,i,j) = x2; + assert(slice_wrapper3.access(s,a,i,j) == x2); + + for (int k = 0; k < depth; k++) { + double x3 = x2*x1+k; + slice_wrapper2.access(s,a,i,j,k) = x3; + assert(slice_wrapper2.access(s,a,i,j,k) == x3); + } + } + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_mix_arrays_test"); + return 0; +} + +int single_type_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + double x = 42/(s+a+1.3); + slice_wrapper0.access(s,a) = x; + assert(slice_wrapper0.access(s,a) == x); + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_single_type_test"); + return 0; +} + +int multi_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -23,20 +187,97 @@ int main(int argc, char* argv[]) { // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { - printf("s: %d, a: %d\n", s,a); double x = 42/(s+a+1.3); + char c = 'a'+s+a; slice_wrapper0.access(s,a) = x; slice_wrapper1.access(s,a) = s+a; slice_wrapper2.access(s,a) = float(x); - slice_wrapper3.access(s,a) = 'a'+s+a; - printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); - printf("SW1 value: %d\n", slice_wrapper1.access(s,a)); - printf("SW2 value: %f\n", slice_wrapper2.access(s,a)); - printf("SW3 value: %c\n", slice_wrapper3.access(s,a)); + slice_wrapper3.access(s,a) = c; + + assert(slice_wrapper0.access(s,a) == x); + assert(slice_wrapper1.access(s,a) == s+a); + assert(slice_wrapper2.access(s,a) == float(x)); + assert(slice_wrapper3.access(s,a) == c); + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_multi_type_test"); + return 0; +} + +int many_type_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); + auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); + auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); + auto slice_wrapper4 = cabSliceFactory.makeSliceCab<4>(); + auto slice_wrapper5 = cabSliceFactory.makeSliceCab<5>(); + auto slice_wrapper6 = cabSliceFactory.makeSliceCab<6>(); + auto slice_wrapper7 = cabSliceFactory.makeSliceCab<7>(); + auto slice_wrapper8 = cabSliceFactory.makeSliceCab<8>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + double d0 = 42/(s+a+1.3); + double d1 = d0*d0; + double d2 = d1/123.456751; + float f0 = 0; + char c0 = 'a'+s+a; + char c1 = 'a'+((s*a+a) % 26); + int i0 = s+a; + int i1 = s+a/int(d0); + int i2 = i0+i1; + slice_wrapper0.access(s,a) = d0; + slice_wrapper1.access(s,a) = d1; + slice_wrapper2.access(s,a) = d2; + slice_wrapper3.access(s,a) = i0; + slice_wrapper4.access(s,a) = f0; + slice_wrapper5.access(s,a) = i1; + slice_wrapper6.access(s,a) = i2; + slice_wrapper7.access(s,a) = c0; + slice_wrapper8.access(s,a) = c1; + + assert(slice_wrapper0.access(s,a) == d0); + assert(slice_wrapper1.access(s,a) == d1); + assert(slice_wrapper2.access(s,a) == d2); + assert(slice_wrapper3.access(s,a) == i0); + assert(slice_wrapper4.access(s,a) == f0); + assert(slice_wrapper5.access(s,a) == i1); + assert(slice_wrapper6.access(s,a) == i2); + assert(slice_wrapper7.access(s,a) == c0); + assert(slice_wrapper8.access(s,a) == c1); }; - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); + return 0; +} + +int main(int argc, char* argv[]) { + // AoSoA parameters + int num_tuples = atoi(argv[1]); + + Kokkos::ScopeGuard scope_guard(argc, argv); + many_type_test(num_tuples); + single_type_test(num_tuples); + multi_type_test(num_tuples); + + rank1_array_test(num_tuples); + rank2_array_test(num_tuples); + rank3_array_test(num_tuples); + mix_arrays_test(num_tuples); + assert(cudaSuccess == cudaDeviceSynchronize()); printf("done\n");