@@ -1759,34 +1759,9 @@ class __SYCL_EXPORT handler {
17591759 // TODO: this variant is currently enabled for 2+ reductions only as the
17601760 // versions handling 1 reduction variable are more efficient right now.
17611761 //
1762- // Algorithm:
1763- // 1) discard_write accessor (DWAcc), InitializeToIdentity = true:
1764- // a) Create uninitialized buffer and read_write accessor (RWAcc).
1765- // b) discard-write partial sums to RWAcc.
1766- // c) Repeat the steps (a) and (b) to get one final sum.
1767- // d) Copy RWAcc to DWAcc.
1768- // 2) read_write accessor (RWAcc), InitializeToIdentity = false:
1769- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1770- // re-use user's RWAcc (if #work-groups is 1).
1771- // b) discard-write to RWAcc (#WG > 1), or update-write (#WG == 1).
1772- // c) Repeat the steps (a) and (b) to get one final sum.
1773- // 3) read_write accessor (RWAcc), InitializeToIdentity = true:
1774- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1775- // re-use user's RWAcc (if #work-groups is 1).
1776- // b) discard-write to RWAcc.
1777- // c) Repeat the steps (a) and (b) to get one final sum.
1778- // 4) USM pointer, InitializeToIdentity = false:
1779- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1780- // re-use user's USM pointer (if #work-groups is 1).
1781- // b) discard-write to RWAcc (#WG > 1) or
1782- // update-write to USM pointer (#WG == 1).
1783- // c) Repeat the steps (a) and (b) to get one final sum.
1784- // 5) USM pointer, InitializeToIdentity = true:
1785- // a) Create new uninitialized buffer (if #work-groups > 1) and RWAcc or
1786- // re-use user's USM pointer (if #work-groups is 1).
1787- // b) discard-write to RWAcc (#WG > 1) or
1788- // discard-write to USM pointer (#WG == 1).
1789- // c) Repeat the steps (a) and (b) to get one final sum.
1762+ // This is basically a tree reduction where we re-use user's reduction
1763+ // variable instead of creating temporary storage for the last iteration
1764+ // (#WG == 1).
17901765 template <typename KernelName = detail::auto_name, int Dims,
17911766 typename ... RestT>
17921767 std::enable_if_t <(sizeof ...(RestT) >= 3 &&
@@ -1823,11 +1798,6 @@ class __SYCL_EXPORT handler {
18231798 AuxHandler, NWorkItems, MaxWGSize, ReduTuple, ReduIndices);
18241799 });
18251800 } // end while (NWorkItems > 1)
1826-
1827- auto CopyEvent = detail::reduSaveFinalResultToUserMem (
1828- QueueCopy, MIsHost, ReduTuple, ReduIndices);
1829- if (CopyEvent)
1830- MLastEvent = *CopyEvent;
18311801 }
18321802#endif // __cplusplus >= 201703L
18331803
0 commit comments