diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d0a7fd0..29c28296 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,13 +16,17 @@ include(CTest) option(IS_TESTING "Build for CTest" OFF) message(STATUS "IS_TESTING: ${IS_TESTING}") -add_executable(SliceWrapper test/SliceWrapper.cpp) +add_executable(SliceWrapper test/testSliceWrapper.cpp) target_include_directories(SliceWrapper PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) #hack - this should be a header-only library target_link_libraries(SliceWrapper Omega_h::omega_h) target_link_libraries(SliceWrapper Cabana::cabanacore) target_compile_definitions(SliceWrapper PUBLIC ENABLE_CABANA) -add_test(sliceWrapper10 ./SliceWrapper 10) -add_test(sliceWrapper32 ./SliceWrapper 32) -add_test(sliceWrapper33 ./SliceWrapper 33) +add_executable(MeshField test/testMeshField.cpp) +target_include_directories(MeshField PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) #hack - this should be a header-only library +target_link_libraries(MeshField Omega_h::omega_h) +target_link_libraries(MeshField Cabana::cabanacore) +target_compile_definitions(MeshField PUBLIC ENABLE_CABANA) + add_test(sliceWrapper50 ./SliceWrapper 50) +add_test(meshField ./MeshField) diff --git a/src/MeshField.hpp b/src/MeshField.hpp new file mode 100644 index 00000000..671749c3 --- /dev/null +++ b/src/MeshField.hpp @@ -0,0 +1,61 @@ +#ifndef meshfield_hpp +#define meshfield_hpp + +#include "SliceWrapper.hpp" + +namespace MeshField { + +template +class Field { + + Slice slice; + +public: + Field(Slice s) : slice(s) {} + + KOKKOS_INLINE_FUNCTION + auto& operator()(int s, int a) const { + return slice.access(s,a); + } + + KOKKOS_INLINE_FUNCTION + auto& operator()(int s, int a, int i) const { + return slice.access(s,a,i); + } + + KOKKOS_INLINE_FUNCTION + auto& operator()(int s, int a, int i, int j) const { + return slice.access(s,a,i,j); + } + + KOKKOS_INLINE_FUNCTION + auto& operator()(int s, int a, int i, int j, int k) const { + return slice.access(s,a,i,j,k); + } +}; + +template +class MeshField { + + Controller sliceController; + +public: + MeshField(Controller sc) : sliceController(std::move(sc)) {} + + template + auto makeField() { + auto slice = sliceController.template makeSlice(); + return Field(std::move(slice)); + } + + template + void parallel_for(int lower_bound, int upper_bound, + FunctorType vector_kernel, + std::string tag) { + sliceController.parallel_for(lower_bound, upper_bound, vector_kernel, tag); + } + +}; + +} +#endif diff --git a/src/SliceWrapper.hpp b/src/SliceWrapper.hpp index 0c852610..8fb2c938 100644 --- a/src/SliceWrapper.hpp +++ b/src/SliceWrapper.hpp @@ -9,6 +9,8 @@ struct SliceWrapper { SliceType st_; //store the underlying instance SliceWrapper(SliceType st) : st_(st) {} + + SliceWrapper() {} KOKKOS_INLINE_FUNCTION T& access(int s, int a) const { @@ -25,14 +27,13 @@ struct SliceWrapper { KOKKOS_INLINE_FUNCTION auto& access(int s, int a, int i, int j, int k) const { return st_.access(s,a,i,j,k); - } - + } }; using namespace Cabana; template -class CabSliceFactory { +class CabSliceController { using TypeTuple = std::tuple; using DeviceType = Kokkos::Device; using DataTypes = Cabana::MemberTypes; @@ -60,15 +61,23 @@ class CabSliceFactory { Cabana::AoSoA aosoa; public: + template + void parallel_for(int lower_bound, int upper_bound, FunctorType vectorKernel, std::string tag) { + Cabana::SimdPolicy simd_policy(lower_bound, upper_bound); + Cabana::simd_parallel_for(simd_policy, vectorKernel, tag); + } + template - auto makeSliceCab() { + auto makeSlice() { using type = std::tuple_element_t; const int stride = sizeof(soa_t) / sizeof(member_value_t); auto slice = Cabana::slice(aosoa); return wrapper_slice_t< type, stride >(std::move(slice)); } + + CabSliceController() {} - CabSliceFactory(int n) : aosoa("sliceAoSoA", n) { + CabSliceController(int n) : aosoa("sliceAoSoA", n) { if (sizeof...(Ts) == 0) { throw std::invalid_argument("Must provide at least one member type in template definition"); } diff --git a/test/testMeshField.cpp b/test/testMeshField.cpp new file mode 100644 index 00000000..8de4bbdc --- /dev/null +++ b/test/testMeshField.cpp @@ -0,0 +1,262 @@ +#include "MeshField.hpp" +#include "SliceWrapper.hpp" + +#include + +using ExecutionSpace = Kokkos::DefaultExecutionSpace; +using MemorySpace = ExecutionSpace::memory_space; + + +void single_type(int num_tuples) { + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + double d0 = 10; + field0(s,a) = d0; + assert(field0(s,a) == d0); + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"single_type_pfor"); + +} + +void multi_type(int num_tuples) { + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + auto field1 = cabMeshField.makeField<1>(); + auto field2 = cabMeshField.makeField<2>(); + auto field3 = cabMeshField.makeField<3>(); + auto field4 = cabMeshField.makeField<4>(); + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + double d0 = 10.456; + field0(s,a) = d0; + double d1 = 43.973234567; + field1(s,a) = d1; + float f0 = 123.45; + field2(s,a) = f0; + int i0 = 22; + field3(s,a) = i0; + char c0 = 'a'; + field4(s,a) = c0; + + assert(field0(s,a) == d0); + assert(field1(s,a) == d1); + assert(field2(s,a) == f0); + assert(field3(s,a) == i0); + assert(field4(s,a) == c0); + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"multi_type_pfor"); +} + +void many_type(int num_tuples) { + + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + auto field1 = cabMeshField.makeField<1>(); + auto field2 = cabMeshField.makeField<2>(); + auto field3 = cabMeshField.makeField<3>(); + auto field4 = cabMeshField.makeField<4>(); + auto field5 = cabMeshField.makeField<5>(); + auto field6 = cabMeshField.makeField<6>(); + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + double d0 = 10.456; + field0(s,a) = d0; + double d1 = 43.973234567; + field1(s,a) = d1; + float f0 = 123.45; + field2(s,a) = f0; + float f1 = 543.21; + field3(s,a) = f1; + int i0 = 222; + field4(s,a) = i0; + short int i1 = 50; + field5(s,a) = i1; + char c0 = 'h'; + field6(s,a) = c0; + + assert(field0(s,a) == d0); + assert(field1(s,a) == d1); + assert(field2(s,a) == f0); + assert(field3(s,a) == f1); + assert(field4(s,a) == i0); + assert(field5(s,a) == i1); + assert(field6(s,a) == c0); + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"many_type_pfor"); +} + +void rank1_arr(int num_tuples) { + const int width = 3; + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + for (int i = 0; i < width; i++) + { + double d0 = 10+i; + field0(s,a,i) = d0; + assert(field0(s,a,i) == d0); + } + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"rank1_arr_pfor"); +} + +void rank2_arr(int num_tuples) { + const int width = 3; + const int height = 4; + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + for (int i = 0; i < width; i++) + { + for (int j = 0; j < height; j++) + { + double d0 = (10+i)/(j+1); + field0(s,a,i,j) = d0; + assert(field0(s,a,i,j) == d0); + } + } + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"rank2_arr_pfor"); +} + +void rank3_arr(int num_tuples) { + const int width = 3; + const int height = 4; + const int depth = 2; + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + for (int i = 0; i < width; i++) + { + for (int j = 0; j < height; j++) + { + for (int k = 0; k < depth; k++) + { + double d0 = ((10+i)*(k+1))/(j+1); + field0(s,a,i,j,k) = d0; + assert(field0(s,a,i,j,k) == d0); + } + } + } + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"rank3_arr_pfor"); + +} + +void mix_arr(int num_tuples) { + const int width = 3; + const int height = 4; + const int depth = 2; + using Controller = CabSliceController; + + // Slice Wrapper Controller + Controller c(num_tuples); + MeshField::MeshField cabMeshField(c); + + auto field0 = cabMeshField.makeField<0>(); + auto field1 = cabMeshField.makeField<1>(); + auto field2 = cabMeshField.makeField<2>(); + auto field3 = cabMeshField.makeField<3>(); + + auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) + { + float f0; + int i0; + char c0 = 's'; + field3(s,a) = c0; + + for (int i = 0; i < width; i++) + { + i0 = i+s+a; + field2(s,a,i) = i0; + for (int j = 0; j < height; j++) + { + f0 = i0 / (i+j+1.123); + field1(s,a,i,j) = f0; + for (int k = 0; k < depth; k++) + { + double d0 = ((10+i)*(k+1))/(j+1); + field0(s,a,i,j,k) = d0; + assert(field0(s,a,i,j,k) == d0); + } + assert(field1(s,a,i,j) == f0); + } + assert(field2(s,a,i) == i0); + } + assert(field3(s,a) == c0); + }; + + cabMeshField.parallel_for(0,num_tuples,vector_kernel,"mix_arr_pfor"); + +} + +int main(int argc, char* argv[]) { + int num_tuples = (argc < 2) ? (1000) : (atoi(argv[1])); + Kokkos::ScopeGuard scope_guard(argc, argv); + + single_type(num_tuples); + multi_type(num_tuples); + many_type(num_tuples); + rank1_arr(num_tuples); + rank2_arr(num_tuples); + rank3_arr(num_tuples); + mix_arr(num_tuples); + + return 0; +} diff --git a/test/SliceWrapper.cpp b/test/testSliceWrapper.cpp similarity index 59% rename from test/SliceWrapper.cpp rename to test/testSliceWrapper.cpp index f151a72c..42d81b77 100644 --- a/test/SliceWrapper.cpp +++ b/test/testSliceWrapper.cpp @@ -7,15 +7,12 @@ int rank1_array_test(int num_tuples) { const int width = 3; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); + // Slice Wrapper Controller + CabSliceController cabSliceController(num_tuples); - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); - // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { for (int i = 0; i < width; i++) { @@ -24,10 +21,8 @@ int rank1_array_test(int num_tuples) { assert(slice_wrapper0.access(s,a,i) == x); } }; - - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank1_array_test"); - return 0; - + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_rank1_array"); + return 0; } int rank2_array_test(int num_tuples) { @@ -37,15 +32,12 @@ int rank2_array_test(int num_tuples) { const int width = 3; const int height = 4; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); + // Slice Wrapper Controller + CabSliceController cabSliceController(num_tuples); - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); - // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { for (int i = 0; i < width; i++) { @@ -56,8 +48,7 @@ int rank2_array_test(int num_tuples) { } } }; - - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank2_array_test"); + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_rank2_array"); return 0; } @@ -69,15 +60,12 @@ int rank3_array_test(int num_tuples) { const int height = 4; const int depth = 2; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); + // Slice Wrapper Controller + CabSliceController cabSliceController(num_tuples); - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); - // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { for (int i = 0; i < width; i++) { @@ -90,8 +78,8 @@ int rank3_array_test(int num_tuples) { } } }; + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_rank3_array"); - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_rank3_array_test"); return 0; } @@ -103,19 +91,16 @@ int mix_arrays_test(int num_tuples) { const int height = 4; const int depth = 2; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); + float[width][height]> cabSliceController(num_tuples); + + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); + auto slice_wrapper1 = cabSliceController.makeSlice<1>(); + auto slice_wrapper2 = cabSliceController.makeSlice<2>(); + auto slice_wrapper3 = cabSliceController.makeSlice<3>(); - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); - auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); - auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); - auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); - - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); - // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { char x0 = 'a'+(s+a); @@ -140,8 +125,7 @@ int mix_arrays_test(int num_tuples) { } } }; - - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_mix_arrays_test"); + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_mix_array"); return 0; } @@ -149,14 +133,11 @@ int single_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); - - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + // Slice Wrapper Controller + CabSliceController cabSliceController(num_tuples); - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { @@ -164,8 +145,8 @@ int single_type_test(int num_tuples) { slice_wrapper0.access(s,a) = x; assert(slice_wrapper0.access(s,a) == x); }; + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_single_type"); - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_single_type_test"); return 0; } @@ -173,17 +154,14 @@ int multi_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); - - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); - auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); - auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); - auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); + // Slice Wrapper Controller + CabSliceController cabSliceController(num_tuples); - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); + auto slice_wrapper1 = cabSliceController.makeSlice<1>(); + auto slice_wrapper2 = cabSliceController.makeSlice<2>(); + auto slice_wrapper3 = cabSliceController.makeSlice<3>(); // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { @@ -199,8 +177,8 @@ int multi_type_test(int num_tuples) { assert(slice_wrapper2.access(s,a) == float(x)); assert(slice_wrapper3.access(s,a) == c); }; + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_multi_type"); - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_multi_type_test"); return 0; } @@ -208,24 +186,21 @@ int many_type_test(int num_tuples) { using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; - // Slice Wrapper Factory - CabSliceFactory cabSliceFactory(num_tuples); + short int, char, char> cabSliceController(num_tuples); - auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); - auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); - auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); - auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); - auto slice_wrapper4 = cabSliceFactory.makeSliceCab<4>(); - auto slice_wrapper5 = cabSliceFactory.makeSliceCab<5>(); - auto slice_wrapper6 = cabSliceFactory.makeSliceCab<6>(); - auto slice_wrapper7 = cabSliceFactory.makeSliceCab<7>(); - auto slice_wrapper8 = cabSliceFactory.makeSliceCab<8>(); - - // simd_parallel_for setup - Cabana::SimdPolicy simd_policy(0, num_tuples); + auto slice_wrapper0 = cabSliceController.makeSlice<0>(); + auto slice_wrapper1 = cabSliceController.makeSlice<1>(); + auto slice_wrapper2 = cabSliceController.makeSlice<2>(); + auto slice_wrapper3 = cabSliceController.makeSlice<3>(); + auto slice_wrapper4 = cabSliceController.makeSlice<4>(); + auto slice_wrapper5 = cabSliceController.makeSlice<5>(); + auto slice_wrapper6 = cabSliceController.makeSlice<6>(); + auto slice_wrapper7 = cabSliceController.makeSlice<7>(); + auto slice_wrapper8 = cabSliceController.makeSlice<8>(); // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { @@ -258,8 +233,7 @@ int many_type_test(int num_tuples) { assert(slice_wrapper7.access(s,a) == c0); assert(slice_wrapper8.access(s,a) == c1); }; - - Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_many_type_test"); + cabSliceController.parallel_for(0, num_tuples, vector_kernel, "parallel_for_many_type"); return 0; } @@ -268,17 +242,16 @@ int main(int argc, char* argv[]) { int num_tuples = atoi(argv[1]); Kokkos::ScopeGuard scope_guard(argc, argv); - - many_type_test(num_tuples); + single_type_test(num_tuples); multi_type_test(num_tuples); + many_type_test(num_tuples); rank1_array_test(num_tuples); rank2_array_test(num_tuples); rank3_array_test(num_tuples); mix_arrays_test(num_tuples); - - assert(cudaSuccess == cudaDeviceSynchronize()); + printf("done\n"); return 0;