1212#include " HeterogeneousCore/AlpakaInterface/interface/memory.h"
1313#include " HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
1414#include " HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
15+ #include " HeterogeneousCore/AlpakaInterface/interface/warpsize.h"
1516
1617using namespace cms ::alpakatools;
1718using namespace ALPAKA_ACCELERATOR_NAMESPACE ;
@@ -33,17 +34,8 @@ struct format_traits<float> {
3334template <typename T>
3435struct testPrefixScan {
3536 ALPAKA_FN_ACC void operator ()(Acc1D const & acc, unsigned int size) const {
36- // alpaka::warp::getSize(acc) is runtime, but we need a compile-time or constexpr value
37- #if defined(__CUDA_ARCH__)
38- // CUDA always has a warp size of 32
39- auto & ws = alpaka::declareSharedVar<T[32 ], __COUNTER__>(acc);
40- #elif defined(__HIP_DEVICE_COMPILE__)
41- // HIP/ROCm defines warpSize as a constant expression with value 32 or 64 depending on the target device
42- auto & ws = alpaka::declareSharedVar<T[warpSize], __COUNTER__>(acc);
43- #else
44- // CPU back-ends always have a warp size of 1
45- auto & ws = alpaka::declareSharedVar<T[1 ], __COUNTER__>(acc);
46- #endif
37+ // alpaka::warp::getSize(acc) is runtime, but we need a compile-time or constexpr value, so we use cms::alpakatools::warpSize
38+ auto & ws = alpaka::declareSharedVar<T[cms::alpakatools::warpSize], __COUNTER__>(acc);
4739 auto & c = alpaka::declareSharedVar<T[1024 ], __COUNTER__>(acc);
4840 auto & co = alpaka::declareSharedVar<T[1024 ], __COUNTER__>(acc);
4941
0 commit comments