diff --git a/src/SliceWrapper.hpp b/src/SliceWrapper.hpp index de20b8f5..7777944c 100644 --- a/src/SliceWrapper.hpp +++ b/src/SliceWrapper.hpp @@ -11,8 +11,8 @@ struct SliceWrapper { SliceWrapper(SliceType st) : st_(st) {} KOKKOS_INLINE_FUNCTION - T& access(const int s, const int a, int i) const { - return st_.access(s,a,i); + T& access(const int s, const int a) const { + return st_.access(s,a); } int arraySize(int s) { return st_.arraySize(s); @@ -24,23 +24,34 @@ struct SliceWrapper { using namespace Cabana; -template +template class CabSliceFactory { + static constexpr int vecLen = Impl::PerformanceTraits::vector_length/8; + using TypeTuple = std::tuple; using DeviceType = Kokkos::Device; - using DataTypes = Cabana::MemberTypes; + using DataTypes = Cabana::MemberTypes; + using soa_t = SoA; + + template using member_slice_t = - Cabana::Slice; - using wrapper_slice_t = SliceWrapper; + vecLen, stride>; + + template + using wrapper_slice_t = SliceWrapper, T>; Cabana::AoSoA aosoa; public: - wrapper_slice_t makeSliceCab() { - auto slice0 = Cabana::slice<0>(aosoa); - return wrapper_slice_t(std::move(slice0)); + template + auto makeSliceCab() { + using type = std::tuple_element_t; + const int stride = (vecLen * sizeof(soa_t)) / (4 * sizeof(type)); + auto slice = Cabana::slice(aosoa); + return wrapper_slice_t< type, stride >(std::move(slice)); } + CabSliceFactory(int n) : aosoa("sliceAoSoA", n) {} }; diff --git a/test/SliceWrapper.cpp b/test/SliceWrapper.cpp index 2baa7399..d82a5fa4 100644 --- a/test/SliceWrapper.cpp +++ b/test/SliceWrapper.cpp @@ -3,34 +3,37 @@ int main(int argc, char* argv[]) { // AoSoA parameters const int vecLen = 4; - const int width = 1; int num_tuples = 10; Kokkos::ScopeGuard scope_guard(argc, argv); - using member_type = double; - using DataTypes = Cabana::MemberTypes; - using ExecutionSpace = Kokkos::Cuda; - using MemorySpace = Kokkos::CudaSpace; - + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = ExecutionSpace::memory_space; // Slice Wrapper Factory CabSliceFactory cabSliceFactory(num_tuples); + double, int, float, char> cabSliceFactory(num_tuples); - auto slice_wrapper = cabSliceFactory.makeSliceCab(); + auto slice_wrapper0 = cabSliceFactory.makeSliceCab<0>(); + auto slice_wrapper1 = cabSliceFactory.makeSliceCab<1>(); + auto slice_wrapper2 = cabSliceFactory.makeSliceCab<2>(); + auto slice_wrapper3 = cabSliceFactory.makeSliceCab<3>(); // simd_parallel_for setup Cabana::SimdPolicy simd_policy(0, num_tuples); // kernel that reads and writes auto vector_kernel = KOKKOS_LAMBDA(const int s, const int a) { - for (int i = 0; i < width; i++) { - printf("s: %d, a: %d, i: %d\n", s,a,i); - double x = 42/(s+a+1.3); - slice_wrapper.access(s,a,i) = x; - printf("value: %lf\n", slice_wrapper.access(s,a,i)); - } + printf("s: %d, a: %d\n", s,a); + double x = 42/(s+a+1.3); + slice_wrapper0.access(s,a) = x; + slice_wrapper1.access(s,a) = s+a; + slice_wrapper2.access(s,a) = float(x); + slice_wrapper3.access(s,a) = 'a'+s+a; + printf("SW0 value: %lf\n", slice_wrapper0.access(s,a)); + printf("SW1 value: %d\n", slice_wrapper1.access(s,a)); + printf("SW2 value: %f\n", slice_wrapper2.access(s,a)); + printf("SW3 value: %c\n", slice_wrapper3.access(s,a)); }; Cabana::simd_parallel_for(simd_policy, vector_kernel, "parallel_for_cabSliceFactory");