Skip to content

Commit 398d6b7

Browse files
committed
vendor warp reduce functions
Format and fix
1 parent 18a86c0 commit 398d6b7

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

algorithms/cudahip/Reduction.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,22 @@ __forceinline__ __device__ T shuffledown(T value, int offset) {
5151
template <typename T, typename OperationT>
5252
__device__ __forceinline__ T warpReduce(T value, OperationT operation) {
5353

54+
#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800) || defined(__HIPCC__)
55+
// C++17 compile-time check to ensure we only use this for ints
56+
if constexpr (std::is_same_v<T, int> || std::is_same_v<T, unsigned int>) {
57+
58+
unsigned int mask = 0xFFFFFFFF; // 32-bit active thread mask
59+
60+
if constexpr (std::is_same_v<operation, device::Sum<T>>) {
61+
return __reduce_add_sync(mask, value);
62+
} else if constexpr (std::is_same_v<operation, device::Min<T>>) {
63+
return __reduce_min_sync(mask, value);
64+
} else if constexpr (std::is_same_v<operation, device::Max<T>>) {
65+
return __reduce_max_sync(mask, value);
66+
}
67+
}
68+
#endif
69+
5470
for (int offset = warpSize / 2; offset > 0; offset /= 2) {
5571
value = operation(value, shuffledown(value, offset));
5672
}

0 commit comments

Comments
 (0)