File tree Expand file tree Collapse file tree 1 file changed +16
-0
lines changed
Expand file tree Collapse file tree 1 file changed +16
-0
lines changed Original file line number Diff line number Diff line change @@ -51,6 +51,22 @@ __forceinline__ __device__ T shuffledown(T value, int offset) {
5151template <typename T, typename OperationT>
5252__device__ __forceinline__ T warpReduce (T value, OperationT operation) {
5353
54+ #if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800) || defined(__HIPCC__)
55+ // C++17 compile-time check to ensure we only use this for ints
56+ if constexpr (std::is_same_v<T, int > || std::is_same_v<T, unsigned int >) {
57+
58+ unsigned int mask = 0xFFFFFFFF ; // 32-bit active thread mask
59+
60+ if constexpr (std::is_same_v<operation, device::Sum<T>>) {
61+ return __reduce_add_sync (mask, value);
62+ } else if constexpr (std::is_same_v<operation, device::Min<T>>) {
63+ return __reduce_min_sync (mask, value);
64+ } else if constexpr (std::is_same_v<operation, device::Max<T>>) {
65+ return __reduce_max_sync (mask, value);
66+ }
67+ }
68+ #endif
69+
5470 for (int offset = warpSize / 2 ; offset > 0 ; offset /= 2 ) {
5571 value = operation (value, shuffledown (value, offset));
5672 }
You can’t perform that action at this time.
0 commit comments