@@ -1932,45 +1932,6 @@ void reduCGFuncMulti(handler &CGH, KernelType KernelFunc,
19321932 Rest (createReduOutAccs<false >(NWorkGroups, CGH, ReduTuple, ReduIndices));
19331933}
19341934
1935- namespace reduction {
1936- namespace main_krn {
1937- template <class KernelName > struct NDRangeAtomic64 ;
1938- } // namespace main_krn
1939- } // namespace reduction
1940-
1941- // Specialization for devices with the atomic64 aspect, which guarantees 64 bit
1942- // floating point support for atomic reduction operation.
1943- template <typename KernelName, typename KernelType, int Dims,
1944- typename PropertiesT, class Reduction >
1945- void reduCGFuncAtomic64 (handler &CGH, KernelType KernelFunc,
1946- const nd_range<Dims> &Range, PropertiesT Properties,
1947- Reduction &Redu) {
1948- auto Out = Redu.getReadWriteAccessorToInitializedMem (CGH);
1949- static_assert (
1950- Reduction::has_float64_atomics,
1951- " Only suitable for reductions that have FP64 atomic operations." );
1952- size_t NElements = Reduction::num_elements;
1953- using Name =
1954- __sycl_reduction_kernel<reduction::main_krn::NDRangeAtomic64, KernelName>;
1955- CGH.parallel_for <Name>(Range, Properties, [=](nd_item<Dims> NDIt) {
1956- // Call user's function. Reducer.MValue gets initialized there.
1957- typename Reduction::reducer_type Reducer;
1958- KernelFunc (NDIt, Reducer);
1959-
1960- // If there are multiple values, reduce each separately
1961- // reduce_over_group is only defined for each T, not for span<T, ...>
1962- for (int E = 0 ; E < NElements; ++E) {
1963- typename Reduction::binary_operation BOp;
1964- Reducer.getElement (E) =
1965- reduce_over_group (NDIt.get_group (), Reducer.getElement (E), BOp);
1966- }
1967-
1968- if (NDIt.get_local_linear_id () == 0 ) {
1969- Reducer.atomic_combine (&Out[0 ]);
1970- }
1971- });
1972- }
1973-
19741935template <typename ... Reductions, size_t ... Is>
19751936void associateReduAccsWithHandler (handler &CGH,
19761937 std::tuple<Reductions...> &ReduTuple,
@@ -2386,8 +2347,8 @@ void reduction_parallel_for(handler &CGH,
23862347 device D = detail::getDeviceFromHandler (CGH);
23872348
23882349 if (D.has (aspect::atomic64)) {
2389- reduCGFuncAtomic64 <KernelName>(CGH, KernelFunc, Range, Properties,
2390- Redu);
2350+ reduCGFuncForNDRangeBothFastReduceAndAtomics <KernelName>(
2351+ CGH, KernelFunc, Range, Properties, Redu);
23912352 } else {
23922353 // Resort to basic implementation as well.
23932354 reduction_parallel_for_basic_impl<KernelName>(
0 commit comments