From b8b2a15620811d6508713b7a44535a58d03dab4c Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Thu, 13 Oct 2022 14:01:12 -0400 Subject: [PATCH 01/10] removed unnecessary functions --- src/SliceWrapper.hpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/SliceWrapper.hpp b/src/SliceWrapper.hpp index 8e9db0b1..18dec7ff 100644 --- a/src/SliceWrapper.hpp +++ b/src/SliceWrapper.hpp @@ -14,12 +14,6 @@ struct SliceWrapper { T& access(const int s, const int a) const { return st_.access(s,a); } - int arraySize(int s) { - return st_.arraySize(s); - } - int numSoA() { - return st_.numSoA(); - } }; using namespace Cabana; From 997669eb259757ac2ddf3e5c759248365f4ee870 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Thu, 13 Oct 2022 14:01:53 -0400 Subject: [PATCH 02/10] added more tests --- test/SliceWrapper.cpp | 91 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index cbd26dce..19dba062 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -1,11 +1,31 @@ #include "SliceWrapper.hpp" +#include -int main(int argc, char* argv[]) { - // AoSoA parameters - int num_tuples = 10; +int single_type_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; - Kokkos::ScopeGuard scope_guard(argc, argv); + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + double x = 42/(s+a+1.3); + slice_wrapper0.access(s,a) = x; + printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + return 0; +} +int multi_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -36,7 +56,70 @@ int main(int argc, char* argv[]) { }; Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + return 0; +} +int many_type_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); + auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); + auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); + auto slice_wrapper4 = cabSliceFactory.makeSliceCab<4>(); + auto slice_wrapper5 = cabSliceFactory.makeSliceCab<5>(); + auto slice_wrapper6 = cabSliceFactory.makeSliceCab<6>(); + auto slice_wrapper7 = cabSliceFactory.makeSliceCab<7>(); + auto slice_wrapper8 = cabSliceFactory.makeSliceCab<8>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + double x = 42/(s+a+1.3); + slice_wrapper0.access(s,a) = x; + slice_wrapper1.access(s,a) = s+a; + slice_wrapper2.access(s,a) = float(x); + slice_wrapper3.access(s,a) = 'a'+s+a; + slice_wrapper4.access(s,a) = int(s+a/x); + slice_wrapper5.access(s,a) = 'a'+((s*a+a) % 26); + slice_wrapper6.access(s,a) = (s+a+a+s*s)*x; + slice_wrapper7.access(s,a) = (s+a)*num_tuples/(s+2); + slice_wrapper8.access(s,a) = (x+s+a)/(x*x); + printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); + printf("SW1 value: %d\n", slice_wrapper1.access(s,a)); + printf("SW2 value: %f\n", slice_wrapper2.access(s,a)); + printf("SW3 value: %c\n", slice_wrapper3.access(s,a)); + printf("SW4 value: %d\n", slice_wrapper4.access(s,a)); + printf("SW5 value: %c\n", slice_wrapper5.access(s,a)); + printf("SW6 value: %lf\n", slice_wrapper6.access(s,a)); + printf("SW7 value: %lu\n", slice_wrapper7.access(s,a)); + printf("SW8 value: %Lf\n", slice_wrapper8.access(s,a)); + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + return 0; +} + + +int main(int argc, char* argv[]) { + // AoSoA parameters + int num_tuples = 50; + + Kokkos::ScopeGuard scope_guard(argc, argv); + + single_type_test(num_tuples); + multi_type_test(num_tuples); + many_type_test(num_tuples); + assert(cudaSuccess == cudaDeviceSynchronize()); printf("done\n"); From fa268d54e569c85053ba745d4a7f8553c97b75f5 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Fri, 14 Oct 2022 14:02:46 -0400 Subject: [PATCH 03/10] added new test cases for array types --- test/SliceWrapper.cpp | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index 19dba062..3e57789c 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -1,6 +1,36 @@ #include "SliceWrapper.hpp" #include +int array_type_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + for (int i = 0; i < width; i++) { + double x = 42/(s+a+i+1.3); + slice_wrapper0.access(s,a,i) = x; + assert(slice_wrapper0.access(s,a,i) == x); + printf("SW0 value: %lf\n", slice_wrapper0.access(s,a,i)); + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_array_type_test"); + return 0; + +} + int single_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -18,10 +48,11 @@ int single_type_test(int num_tuples) { auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { double x = 42/(s+a+1.3); slice_wrapper0.access(s,a) = x; + printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); }; - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_single_type_test"); return 0; } @@ -55,7 +86,7 @@ int multi_type_test(int num_tuples) { printf("SW3 value: %c\n", slice_wrapper3.access(s,a)); }; - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_multi_type_test"); return 0; } @@ -105,7 +136,7 @@ int many_type_test(int num_tuples) { printf("SW8 value: %Lf\n", slice_wrapper8.access(s,a)); }; - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory"); + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); return 0; } @@ -116,9 +147,15 @@ int main(int argc, char* argv[]) { Kokkos::ScopeGuard scope_guard(argc, argv); + no_type_test(num_tuples); single_type_test(num_tuples); multi_type_test(num_tuples); many_type_test(num_tuples); + 1d_array_test(num_tuples); + 2d_array_test(num_tuples); + 3d_array_test(num_tuples); + mix_arrays_test(num_tuples); + assert(cudaSuccess == cudaDeviceSynchronize()); printf("done\n"); From e9ac9d3a353bc9de604c4a722bc6eda8268292b2 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Fri, 14 Oct 2022 14:03:38 -0400 Subject: [PATCH 04/10] added support for array types --- src/SliceWrapper.hpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/SliceWrapper.hpp b/src/SliceWrapper.hpp index 18dec7ff..fc53964d 100644 --- a/src/SliceWrapper.hpp +++ b/src/SliceWrapper.hpp @@ -11,9 +11,14 @@ struct SliceWrapper { SliceWrapper(SliceType st) : st_(st) {} KOKKOS_INLINE_FUNCTION - T& access(const int s, const int a) const { + T& access(int s, int a) const { return st_.access(s,a); } + KOKKOS_INLINE_FUNCTION + auto& access(int s, int a, int i) const { + return st_.access(s,a,i); + } + }; using namespace Cabana; @@ -27,6 +32,13 @@ class CabSliceFactory { static constexpr int vecLen = Cabana::AoSoA::vector_length; private: using soa_t = SoA; + + template + using member_data_t = typename Cabana::MemberTypeAtIndex::type; + + template + using member_value_t = + typename std::remove_all_extents>::type; template using member_slice_t = @@ -43,7 +55,7 @@ class CabSliceFactory { template auto makeSliceCab() { using type = std::tuple_element_t; - const int stride = sizeof(soa_t) / sizeof(type); + const int stride = sizeof(soa_t) / sizeof(member_value_t); auto slice = Cabana::slice(aosoa); return wrapper_slice_t< type, stride >(std::move(slice)); } From 7165bb10abc6e2434035af24fabd8cac75d21c54 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Fri, 14 Oct 2022 16:50:16 -0400 Subject: [PATCH 05/10] now supports multi-dimensional arrays, new test cases --- CMakeLists.txt | 4 +- src/SliceWrapper.hpp | 14 ++- test/SliceWrapper.cpp | 193 +++++++++++++++++++++++++++++++++++------- 3 files changed, 179 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4abce126..fb96d0b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,4 +22,6 @@ target_link_libraries(SliceWrapper Omega_h::omega_h) target_link_libraries(SliceWrapper Cabana::cabanacore) target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA) -add_test(sliceWrapper ./SliceWrapper) +add_test(sliceWrapper10 ./SliceWrapper 10) +add_test(sliceWrapper50 ./SliceWrapper 50) +add_test(sliceWrapper100 ./SliceWrapper 100) diff --git a/src/SliceWrapper.hpp b/src/SliceWrapper.hpp index fc53964d..0c852610 100644 --- a/src/SliceWrapper.hpp +++ b/src/SliceWrapper.hpp @@ -18,6 +18,14 @@ struct SliceWrapper { auto& access(int s, int a, int i) const { return st_.access(s,a,i); } + KOKKOS_INLINE_FUNCTION + auto& access(int s, int a, int i, int j) const { + return st_.access(s,a,i,j); + } + KOKKOS_INLINE_FUNCTION + auto& access(int s, int a, int i, int j, int k) const { + return st_.access(s,a,i,j,k); + } }; @@ -60,7 +68,11 @@ class CabSliceFactory { return wrapper_slice_t< type, stride >(std::move(slice)); } - CabSliceFactory(int n) : aosoa("sliceAoSoA", n) {} + CabSliceFactory(int n) : aosoa("sliceAoSoA", n) { + if (sizeof...(Ts) == 0) { + throw std::invalid_argument("Must provide at least one member type in template definition"); + } + } }; diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index 3e57789c..fb5fc813 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -1,7 +1,7 @@ #include "SliceWrapper.hpp" #include -int array_type_test(int num_tuples) { +int rank1_array_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -22,15 +22,145 @@ int array_type_test(int num_tuples) { double x = 42/(s+a+i+1.3); slice_wrapper0.access(s,a,i) = x; assert(slice_wrapper0.access(s,a,i) == x); - printf("SW0 value: %lf\n", slice_wrapper0.access(s,a,i)); } }; - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_array_type_test"); + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank1_array_test"); return 0; } +int rank2_array_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + const int height = 4; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + double x = 42/(s+a+i+j+1.3); + slice_wrapper0.access(s,a,i,j) = x; + assert(slice_wrapper0.access(s,a,i,j) == x); + } + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank2_array_test"); + return 0; +} + +int rank3_array_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + const int height = 4; + const int depth = 2; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + for (int i = 0; i < width; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < depth; k++) { + double x = 42/(s+a+i+j+k+1.3); + slice_wrapper0.access(s,a,i,j,k) = x; + assert(slice_wrapper0.access(s,a,i,j,k) == x); + } + } + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank3_array_test"); + return 0; +} + +int mix_arrays_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + const int width = 3; + const int height = 4; + const int depth = 2; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); + auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); + auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); + + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + char x0 = 'a'+(s+a); + slice_wrapper1.access(s,a) = x0; + assert(slice_wrapper1.access(s,a) == x0); + + for (int i = 0; i < width; i++) { + double x1 = 42/(s+a+i+1.3); + slice_wrapper0.access(s,a,i) = x1; + assert(slice_wrapper0.access(s,a,i) == x1); + + for (int j = 0; j < height; j++) { + double x2 = float(x1/(j+1.2)); + slice_wrapper3.access(s,a,i,j) = x2; + assert(slice_wrapper3.access(s,a,i,j) == x2); + + for (int k = 0; k < depth; k++) { + double x3 = x2*x1+k; + slice_wrapper2.access(s,a,i,j,k) = x3; + assert(slice_wrapper2.access(s,a,i,j,k) == x3); + } + } + } + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_mix_arrays_test"); + return 0; +} + +int no_type_test(int num_tuples) { + /* + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + bool exception = false; + try { + CabSliceFactory cabSliceFactory(num_tuples); + } + catch (std::invalid_argument &e) { + exception = true; + } + assert(exception); + */ + return 0; +} + int single_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -48,8 +178,7 @@ int single_type_test(int num_tuples) { auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { double x = 42/(s+a+1.3); slice_wrapper0.access(s,a) = x; - - printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); + assert(slice_wrapper0.access(s,a) == x); }; Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_single_type_test"); @@ -74,16 +203,17 @@ int multi_type_test(int num_tuples) { // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { - printf("s: %d, a: %d\n", s,a); double x = 42/(s+a+1.3); + char c = 'a'+s+a; slice_wrapper0.access(s,a) = x; slice_wrapper1.access(s,a) = s+a; slice_wrapper2.access(s,a) = float(x); - slice_wrapper3.access(s,a) = 'a'+s+a; - printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); - printf("SW1 value: %d\n", slice_wrapper1.access(s,a)); - printf("SW2 value: %f\n", slice_wrapper2.access(s,a)); - printf("SW3 value: %c\n", slice_wrapper3.access(s,a)); + slice_wrapper3.access(s,a) = c; + + assert(slice_wrapper0.access(s,a) == x); + assert(slice_wrapper1.access(s,a) == s+a); + assert(slice_wrapper2.access(s,a) == float(x)); + assert(slice_wrapper3.access(s,a) == c); }; Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_multi_type_test"); @@ -116,31 +246,35 @@ int many_type_test(int num_tuples) { // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { double x = 42/(s+a+1.3); + float fx = 0; + char c0 = 'a'+s+a; + char c1 = 'a'+((s*a+a) % 26); slice_wrapper0.access(s,a) = x; slice_wrapper1.access(s,a) = s+a; - slice_wrapper2.access(s,a) = float(x); - slice_wrapper3.access(s,a) = 'a'+s+a; + slice_wrapper2.access(s,a) = fx; + slice_wrapper3.access(s,a) = c0; slice_wrapper4.access(s,a) = int(s+a/x); - slice_wrapper5.access(s,a) = 'a'+((s*a+a) % 26); + slice_wrapper5.access(s,a) = c1; slice_wrapper6.access(s,a) = (s+a+a+s*s)*x; slice_wrapper7.access(s,a) = (s+a)*num_tuples/(s+2); slice_wrapper8.access(s,a) = (x+s+a)/(x*x); - printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); - printf("SW1 value: %d\n", slice_wrapper1.access(s,a)); - printf("SW2 value: %f\n", slice_wrapper2.access(s,a)); - printf("SW3 value: %c\n", slice_wrapper3.access(s,a)); - printf("SW4 value: %d\n", slice_wrapper4.access(s,a)); - printf("SW5 value: %c\n", slice_wrapper5.access(s,a)); - printf("SW6 value: %lf\n", slice_wrapper6.access(s,a)); - printf("SW7 value: %lu\n", slice_wrapper7.access(s,a)); - printf("SW8 value: %Lf\n", slice_wrapper8.access(s,a)); + + assert(slice_wrapper0.access(s,a) == x); + assert(slice_wrapper1.access(s,a) == s+a); + assert(slice_wrapper2.access(s,a) == fx); + printf("int?: %d\n", slice_wrapper2.access(s,a)); + assert(slice_wrapper3.access(s,a) == c0); + assert(slice_wrapper4.access(s,a) == int(s+a/x)); + assert(slice_wrapper5.access(s,a) == c1); + assert(slice_wrapper6.access(s,a) == (s+a+a+s*s)*x); + assert(slice_wrapper7.access(s,a) == (s+a)*num_tuples/(s+2)); + assert(slice_wrapper8.access(s,a) == (x+s+a)/(x*x)); }; Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); return 0; } - - + int main(int argc, char* argv[]) { // AoSoA parameters int num_tuples = 50; @@ -151,11 +285,10 @@ int main(int argc, char* argv[]) { single_type_test(num_tuples); multi_type_test(num_tuples); many_type_test(num_tuples); - 1d_array_test(num_tuples); - 2d_array_test(num_tuples); - 3d_array_test(num_tuples); - mix_arrays_test(num_tuples); - + rank1_array_test(num_tuples); + rank2_array_test(num_tuples); + rank3_array_test(num_tuples); + mix_arrays_test(num_tuples); assert(cudaSuccess == cudaDeviceSynchronize()); printf("done\n"); From 2e0f712882b511542cfb8fbc0791470e60909016 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Mon, 17 Oct 2022 12:44:36 -0400 Subject: [PATCH 06/10] issue arises with many types when num_tuples goes above 32 --- CMakeLists.txt | 2 ++ test/SliceWrapper.cpp | 41 +++++++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb96d0b5..3d1088cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,5 +23,7 @@ target_link_libraries(SliceWrapper Cabana::cabanacore) target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA) add_test(sliceWrapper10 ./SliceWrapper 10) +add_test(sliceWrapper32 ./SliceWrapper 32) +add_test(sliceWrapper33 ./SliceWrapper 33) add_test(sliceWrapper50 ./SliceWrapper 50) add_test(sliceWrapper100 ./SliceWrapper 100) diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index fb5fc813..b3b204d0 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -245,30 +245,35 @@ int many_type_test(int num_tuples) { // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { - double x = 42/(s+a+1.3); - float fx = 0; + double d0 = 42/(s+a+1.3); + double d1 = d0*d0; + double d2 = d1/123.456751; + float f0 = 0; char c0 = 'a'+s+a; char c1 = 'a'+((s*a+a) % 26); - slice_wrapper0.access(s,a) = x; - slice_wrapper1.access(s,a) = s+a; - slice_wrapper2.access(s,a) = fx; + int i0 = s+a; + int i1 = s+a/int(d0); + int i2 = i0+i1; + slice_wrapper0.access(s,a) = d0; + slice_wrapper1.access(s,a) = i0; + slice_wrapper2.access(s,a) = f0; slice_wrapper3.access(s,a) = c0; - slice_wrapper4.access(s,a) = int(s+a/x); + slice_wrapper4.access(s,a) = i1; slice_wrapper5.access(s,a) = c1; - slice_wrapper6.access(s,a) = (s+a+a+s*s)*x; - slice_wrapper7.access(s,a) = (s+a)*num_tuples/(s+2); - slice_wrapper8.access(s,a) = (x+s+a)/(x*x); + slice_wrapper6.access(s,a) = d1; + slice_wrapper7.access(s,a) = i2; + slice_wrapper8.access(s,a) = d2; - assert(slice_wrapper0.access(s,a) == x); - assert(slice_wrapper1.access(s,a) == s+a); - assert(slice_wrapper2.access(s,a) == fx); - printf("int?: %d\n", slice_wrapper2.access(s,a)); + assert(slice_wrapper0.access(s,a) == d0); + assert(slice_wrapper1.access(s,a) == i0); + assert(slice_wrapper2.access(s,a) == f0); + printf("float in hex: %x\ni0: %x\n", slice_wrapper2.access(s,a), i0); assert(slice_wrapper3.access(s,a) == c0); - assert(slice_wrapper4.access(s,a) == int(s+a/x)); + assert(slice_wrapper4.access(s,a) == i1); assert(slice_wrapper5.access(s,a) == c1); - assert(slice_wrapper6.access(s,a) == (s+a+a+s*s)*x); - assert(slice_wrapper7.access(s,a) == (s+a)*num_tuples/(s+2)); - assert(slice_wrapper8.access(s,a) == (x+s+a)/(x*x)); + assert(slice_wrapper6.access(s,a) == d1); + assert(slice_wrapper7.access(s,a) == i2); + assert(slice_wrapper8.access(s,a) == d2); }; Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); @@ -277,7 +282,7 @@ int many_type_test(int num_tuples) { int main(int argc, char* argv[]) { // AoSoA parameters - int num_tuples = 50; + int num_tuples = atoi(argv[1]); Kokkos::ScopeGuard scope_guard(argc, argv); From 1c35d7c1af1718fbb57a11ebb5ebaa7593c61951 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Tue, 18 Oct 2022 11:01:36 -0400 Subject: [PATCH 07/10] descending size ordered many type test works --- test/SliceWrapper.cpp | 65 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index b3b204d0..f3263ac0 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -267,7 +267,7 @@ int many_type_test(int num_tuples) { assert(slice_wrapper0.access(s,a) == d0); assert(slice_wrapper1.access(s,a) == i0); assert(slice_wrapper2.access(s,a) == f0); - printf("float in hex: %x\ni0: %x\n", slice_wrapper2.access(s,a), i0); + printf("float: %f\n", slice_wrapper2.access(s,a)); assert(slice_wrapper3.access(s,a) == c0); assert(slice_wrapper4.access(s,a) == i1); assert(slice_wrapper5.access(s,a) == c1); @@ -279,17 +279,78 @@ int many_type_test(int num_tuples) { Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); return 0; } + +int ordered_many_type_test(int num_tuples) { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; + + // Slice Wrapper Factory + CabSliceFactory cabSliceFactory(num_tuples); + + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); + auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); + auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); + auto slice_wrapper4 = cabSliceFactory.makeSliceCab<4>(); + auto slice_wrapper5 = cabSliceFactory.makeSliceCab<5>(); + auto slice_wrapper6 = cabSliceFactory.makeSliceCab<6>(); + auto slice_wrapper7 = cabSliceFactory.makeSliceCab<7>(); + auto slice_wrapper8 = cabSliceFactory.makeSliceCab<8>(); + // simd_parallel_for setup + Cabana::SimdPolicy simd_policy(0, num_tuples); + + // kernel that reads and writes + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { + double d0 = 42/(s+a+1.3); + double d1 = d0*d0; + double d2 = d1/123.456751; + float f0 = 0; + char c0 = 'a'+s+a; + char c1 = 'a'+((s*a+a) % 26); + int i0 = s+a; + int i1 = s+a/int(d0); + int i2 = i0+i1; + slice_wrapper0.access(s,a) = d0; + slice_wrapper1.access(s,a) = d1; + slice_wrapper2.access(s,a) = d2; + slice_wrapper3.access(s,a) = f0; + slice_wrapper4.access(s,a) = i0; + slice_wrapper5.access(s,a) = i1; + slice_wrapper6.access(s,a) = i2; + slice_wrapper7.access(s,a) = c0; + slice_wrapper8.access(s,a) = c1; + + assert(slice_wrapper0.access(s,a) == d0); + assert(slice_wrapper1.access(s,a) == d1); + assert(slice_wrapper2.access(s,a) == d2); + assert(slice_wrapper3.access(s,a) == f0); + assert(slice_wrapper4.access(s,a) == i0); + assert(slice_wrapper5.access(s,a) == i1); + assert(slice_wrapper6.access(s,a) == i2); + assert(slice_wrapper7.access(s,a) == c0); + assert(slice_wrapper8.access(s,a) == c1); + }; + + Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); + return 0; +} + int main(int argc, char* argv[]) { // AoSoA parameters int num_tuples = atoi(argv[1]); Kokkos::ScopeGuard scope_guard(argc, argv); + //many_type_test(num_tuples); // fails due to types not being in descending order + ordered_many_type_test(num_tuples); no_type_test(num_tuples); single_type_test(num_tuples); multi_type_test(num_tuples); - many_type_test(num_tuples); + rank1_array_test(num_tuples); rank2_array_test(num_tuples); rank3_array_test(num_tuples); From 79e90db64693e607ff0a2a98a1ea2f7d1c7f81a6 Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Tue, 18 Oct 2022 12:57:03 -0400 Subject: [PATCH 08/10] removed broken test case --- test/SliceWrapper.cpp | 71 +++---------------------------------------- 1 file changed, 5 insertions(+), 66 deletions(-) diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index f3263ac0..9cfa9381 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -224,66 +224,6 @@ int many_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); - - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); - auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); - auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); - auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); - auto slice_wrapper4 = cabSliceFactory.makeSliceCab<4>(); - auto slice_wrapper5 = cabSliceFactory.makeSliceCab<5>(); - auto slice_wrapper6 = cabSliceFactory.makeSliceCab<6>(); - auto slice_wrapper7 = cabSliceFactory.makeSliceCab<7>(); - auto slice_wrapper8 = cabSliceFactory.makeSliceCab<8>(); - - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); - - // kernel that reads and writes - auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { - double d0 = 42/(s+a+1.3); - double d1 = d0*d0; - double d2 = d1/123.456751; - float f0 = 0; - char c0 = 'a'+s+a; - char c1 = 'a'+((s*a+a) % 26); - int i0 = s+a; - int i1 = s+a/int(d0); - int i2 = i0+i1; - slice_wrapper0.access(s,a) = d0; - slice_wrapper1.access(s,a) = i0; - slice_wrapper2.access(s,a) = f0; - slice_wrapper3.access(s,a) = c0; - slice_wrapper4.access(s,a) = i1; - slice_wrapper5.access(s,a) = c1; - slice_wrapper6.access(s,a) = d1; - slice_wrapper7.access(s,a) = i2; - slice_wrapper8.access(s,a) = d2; - - assert(slice_wrapper0.access(s,a) == d0); - assert(slice_wrapper1.access(s,a) == i0); - assert(slice_wrapper2.access(s,a) == f0); - printf("float: %f\n", slice_wrapper2.access(s,a)); - assert(slice_wrapper3.access(s,a) == c0); - assert(slice_wrapper4.access(s,a) == i1); - assert(slice_wrapper5.access(s,a) == c1); - assert(slice_wrapper6.access(s,a) == d1); - assert(slice_wrapper7.access(s,a) == i2); - assert(slice_wrapper8.access(s,a) == d2); - }; - - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); - return 0; -} - -int ordered_many_type_test(int num_tuples) { - using ExecutionSpace = Kokkos::DefaultExecutionSpace; - using MemorySpace = ExecutionSpace::memory_space; - // Slice Wrapper Factory CabSliceFactory Date: Tue, 18 Oct 2022 16:48:29 -0400 Subject: [PATCH 09/10] removed no type test as Cabana::MemberTypes does not support zero types --- CMakeLists.txt | 11 ++++++++++- test/SliceWrapper.cpp | 17 ----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d1088cd..b09d105c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,8 +22,17 @@ target_link_libraries(SliceWrapper Omega_h::omega_h) target_link_libraries(SliceWrapper Cabana::cabanacore) target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA) +#add_executable(AoSoA_Ordering test/aosoa_ordering_issue.cpp) +#target_link_libraries(AoSoA_Ordering Omega_h::omega_h) +#target_link_libraries(AoSoA_Ordering Cabana::cabanacore) +#target_compile_definitions(AoSoA_Ordering PUBLIC ENABLE_CABANA) + add_test(sliceWrapper10 ./SliceWrapper 10) add_test(sliceWrapper32 ./SliceWrapper 32) add_test(sliceWrapper33 ./SliceWrapper 33) add_test(sliceWrapper50 ./SliceWrapper 50) -add_test(sliceWrapper100 ./SliceWrapper 100) + +#add_test(AoSoAOrderingTest10 ./AoSoA_Ordering 10) +#add_test(AoSoAOrderingTest32 ./AoSoA_Ordering 32) +#add_test(AoSoAOrderingTest33 ./AoSoA_Ordering 33) +#add_test(AoSoAOrderingTest50 ./AoSoA_Ordering 50) diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index 9cfa9381..f151a72c 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -145,22 +145,6 @@ int mix_arrays_test(int num_tuples) { return 0; } -int no_type_test(int num_tuples) { - /* - using ExecutionSpace = Kokkos::DefaultExecutionSpace; - using MemorySpace = ExecutionSpace::memory_space; - bool exception = false; - try { - CabSliceFactory cabSliceFactory(num_tuples); - } - catch (std::invalid_argument &e) { - exception = true; - } - assert(exception); - */ - return 0; -} - int single_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -286,7 +270,6 @@ int main(int argc, char* argv[]) { Kokkos::ScopeGuard scope_guard(argc, argv); many_type_test(num_tuples); - no_type_test(num_tuples); single_type_test(num_tuples); multi_type_test(num_tuples); From d476e54dae497dabac1c9a42ed2ad144c48759bf Mon Sep 17 00:00:00 2001 From: Henry Cullom Date: Thu, 20 Oct 2022 09:58:37 -0400 Subject: [PATCH 10/10] removed unnecessary test build instructions --- CMakeLists.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b09d105c..7d0a7fd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,17 +22,7 @@ target_link_libraries(SliceWrapper Omega_h::omega_h) target_link_libraries(SliceWrapper Cabana::cabanacore) target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA) -#add_executable(AoSoA_Ordering test/aosoa_ordering_issue.cpp) -#target_link_libraries(AoSoA_Ordering Omega_h::omega_h) -#target_link_libraries(AoSoA_Ordering Cabana::cabanacore) -#target_compile_definitions(AoSoA_Ordering PUBLIC ENABLE_CABANA) - add_test(sliceWrapper10 ./SliceWrapper 10) add_test(sliceWrapper32 ./SliceWrapper 32) add_test(sliceWrapper33 ./SliceWrapper 33) add_test(sliceWrapper50 ./SliceWrapper 50) - -#add_test(AoSoAOrderingTest10 ./AoSoA_Ordering 10) -#add_test(AoSoAOrderingTest32 ./AoSoA_Ordering 32) -#add_test(AoSoAOrderingTest33 ./AoSoA_Ordering 33) -#add_test(AoSoAOrderingTest50 ./AoSoA_Ordering 50)