@@ -86,7 +86,8 @@ using IsReduOptForFastAtomicFetch =
8686#ifdef SYCL_REDUCTION_DETERMINISTIC
8787 bool_constant<false >;
8888#else
89- bool_constant<sycl::detail::is_sgeninteger<T>::value &&
89+ bool_constant<((sycl::detail::is_sgenfloat<T>::value && sizeof (T) == 4 ) ||
90+ sycl::detail::is_sgeninteger<T>::value) &&
9091 sycl::detail::IsValidAtomicType<T>::value &&
9192 (sycl::detail::IsPlus<T, BinaryOperation>::value ||
9293 sycl::detail::IsMinimum<T, BinaryOperation>::value ||
@@ -104,18 +105,15 @@ using IsReduOptForFastAtomicFetch =
104105// IsReduOptForFastReduce. The macro SYCL_REDUCTION_DETERMINISTIC prohibits
105106// using the reduce_over_group() algorithm to produce stable results across same
106107// type devices.
107- // TODO 32 bit floating point atomics are eventually expected to be supported by
108- // the has_fast_atomics specialization. Once the reducer class is updated to
109- // replace the deprecated atomic class with atomic_ref, the (sizeof(T) == 4)
110- // case should be removed here and replaced in IsReduOptForFastAtomicFetch.
111108template <typename T, class BinaryOperation >
112- using IsReduOptForAtomic64Add =
109+ using IsReduOptForAtomic64Op =
113110#ifdef SYCL_REDUCTION_DETERMINISTIC
114111 bool_constant<false >;
115112#else
116- bool_constant<sycl::detail::IsPlus<T, BinaryOperation>::value &&
117- sycl::detail::is_sgenfloat<T>::value &&
118- (sizeof (T) == 4 || sizeof (T) == 8 )>;
113+ bool_constant<(sycl::detail::IsPlus<T, BinaryOperation>::value ||
114+ sycl::detail::IsMinimum<T, BinaryOperation>::value ||
115+ sycl::detail::IsMaximum<T, BinaryOperation>::value) &&
116+ sycl::detail::is_sgenfloat<T>::value && sizeof (T) == 8 >;
119117#endif
120118
121119// This type trait is used to detect if the group algorithm reduce() used with
@@ -278,7 +276,7 @@ template <class Reducer> class combiner {
278276 typename _T = T, class _BinaryOperation = BinaryOperation>
279277 enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
280278 (IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
281- IsReduOptForAtomic64Add <T, _BinaryOperation>::value) &&
279+ IsReduOptForAtomic64Op <T, _BinaryOperation>::value) &&
282280 sycl::detail::IsPlus<T, _BinaryOperation>::value>
283281 atomic_combine (_T *ReduVarPtr) const {
284282 atomic_combine_impl<Space>(
@@ -324,7 +322,8 @@ template <class Reducer> class combiner {
324322 template <access::address_space Space = access::address_space::global_space,
325323 typename _T = T, class _BinaryOperation = BinaryOperation>
326324 enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
327- IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
325+ (IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
326+ IsReduOptForAtomic64Op<T, _BinaryOperation>::value) &&
328327 sycl::detail::IsMinimum<T, _BinaryOperation>::value>
329328 atomic_combine (_T *ReduVarPtr) const {
330329 atomic_combine_impl<Space>(
@@ -335,7 +334,8 @@ template <class Reducer> class combiner {
335334 template <access::address_space Space = access::address_space::global_space,
336335 typename _T = T, class _BinaryOperation = BinaryOperation>
337336 enable_if_t <BasicCheck<_T, Space, _BinaryOperation> &&
338- IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
337+ (IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
338+ IsReduOptForAtomic64Op<T, _BinaryOperation>::value) &&
339339 sycl::detail::IsMaximum<T, _BinaryOperation>::value>
340340 atomic_combine (_T *ReduVarPtr) const {
341341 atomic_combine_impl<Space>(
@@ -591,8 +591,8 @@ class reduction_impl_algo : public reduction_impl_common<T, BinaryOperation> {
591591 using rw_accessor_type = accessor<T, accessor_dim, access::mode::read_write,
592592 access::target::device, is_placeholder,
593593 ext::oneapi::accessor_property_list<>>;
594- static constexpr bool has_atomic_add_float64 =
595- IsReduOptForAtomic64Add <T, BinaryOperation>::value;
594+ static constexpr bool has_float64_atomics =
595+ IsReduOptForAtomic64Op <T, BinaryOperation>::value;
596596 static constexpr bool has_fast_atomics =
597597 IsReduOptForFastAtomicFetch<T, BinaryOperation>::value;
598598 static constexpr bool has_fast_reduce =
@@ -678,7 +678,7 @@ class reduction_impl_algo : public reduction_impl_common<T, BinaryOperation> {
678678 // / require initialization with identity value, then return user's read-write
679679 // / accessor. Otherwise, create global buffer with 'num_elements' initialized
680680 // / with identity value and return an accessor to that buffer.
681- template <bool HasFastAtomics = (has_fast_atomics || has_atomic_add_float64 )>
681+ template <bool HasFastAtomics = (has_fast_atomics || has_float64_atomics )>
682682 std::enable_if_t <HasFastAtomics, rw_accessor_type>
683683 getReadWriteAccessorToInitializedMem (handler &CGH) {
684684 if constexpr (is_rw_acc) {
@@ -2093,18 +2093,15 @@ template <class KernelName> struct NDRangeAtomic64;
20932093} // namespace main_krn
20942094} // namespace reduction
20952095
2096- // Specialization for devices with the atomic64 aspect, which guarantees 64 (and
2097- // temporarily 32) bit floating point support for atomic add.
2098- // TODO 32 bit floating point atomics are eventually expected to be supported by
2099- // the has_fast_atomics specialization. Corresponding changes to
2100- // IsReduOptForAtomic64Add, as prescribed in its documentation, should then also
2101- // be made.
2096+ // Specialization for devices with the atomic64 aspect, which guarantees 64 bit
2097+ // floating point support for atomic reduction operation.
21022098template <typename KernelName, typename KernelType, int Dims, class Reduction >
21032099void reduCGFuncAtomic64 (handler &CGH, KernelType KernelFunc,
21042100 const nd_range<Dims> &Range, Reduction &Redu) {
21052101 auto Out = Redu.getReadWriteAccessorToInitializedMem (CGH);
2106- static_assert (Reduction::has_atomic_add_float64,
2107- " Only suitable for reductions that have FP64 atomic add." );
2102+ static_assert (
2103+ Reduction::has_float64_atomics,
2104+ " Only suitable for reductions that have FP64 atomic operations." );
21082105 constexpr size_t NElements = Reduction::num_elements;
21092106 using Name =
21102107 __sycl_reduction_kernel<reduction::main_krn::NDRangeAtomic64, KernelName>;
0 commit comments