55# it does double the number of operations compared to accumulate,
66# though for cheap operations like + this does not have much impact (20%)
77function _accumulate_pairwise! (op:: Op , c:: AbstractVector{T} , v:: AbstractVector , s, i1, n):: T where {T,Op}
8- @inbounds if n < 128
9- s_ = v[i1]
10- c[i1] = op (s, s_)
8+ if n < 128
9+ @inbounds s_ = v[i1]
10+ ci1 = op (s, s_)
11+ @inbounds c[i1] = ci1
1112 for i = i1+ 1 : i1+ n- 1
12- s_ = op (s_, v[i])
13- c[i] = op (s, s_)
13+ s_ = op (s_, @inbounds (v[i]))
14+ ci = op (s, s_)
15+ @inbounds c[i] = ci
1416 end
1517 else
1618 n2 = n >> 1
@@ -26,7 +28,8 @@ function accumulate_pairwise!(op::Op, result::AbstractVector, v::AbstractVector)
2628 n = length (li)
2729 n == 0 && return result
2830 i1 = first (li)
29- @inbounds result[i1] = v1 = reduce_first (op,v[i1])
31+ v1 = reduce_first (op, @inbounds (v[i1]))
32+ @inbounds result[i1] = v1
3033 n == 1 && return result
3134 _accumulate_pairwise! (op, result, v, v1, i1+ 1 , n- 1 )
3235 return result
@@ -379,16 +382,16 @@ function _accumulate!(op, B, A, dims::Integer, init::Union{Nothing, Some})
379382 # We can accumulate to a temporary variable, which allows
380383 # register usage and will be slightly faster
381384 ind1 = inds_t[1 ]
382- @inbounds for I in CartesianIndices (tail (inds_t))
385+ for I in CartesianIndices (tail (inds_t))
383386 if init === nothing
384- tmp = reduce_first (op, A[first (ind1), I])
387+ tmp = reduce_first (op, @inbounds ( A[first (ind1), I]) )
385388 else
386- tmp = op (something (init), A[first (ind1), I])
389+ tmp = op (something (init), @inbounds ( A[first (ind1), I]) )
387390 end
388- B[first (ind1), I] = tmp
391+ @inbounds B[first (ind1), I] = tmp
389392 for i_1 = first (ind1)+ 1 : last (ind1)
390- tmp = op (tmp, A[i_1, I])
391- B[i_1, I] = tmp
393+ tmp = op (tmp, @inbounds ( A[i_1, I]) )
394+ @inbounds B[i_1, I] = tmp
392395 end
393396 end
394397 else
@@ -402,25 +405,31 @@ end
402405@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Nothing )
403406 # Copy the initial element in each 1d vector along dimension `dim`
404407 ii = first (ind)
405- @inbounds for J in R2, I in R1
406- B[I, ii, J] = reduce_first (op, A[I, ii, J])
408+ for J in R2, I in R1
409+ tmp = reduce_first (op, @inbounds (A[I, ii, J]))
410+ @inbounds B[I, ii, J] = tmp
407411 end
408412 # Accumulate
409- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
410- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
413+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
414+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
415+ tmp = op (Bv, Av)
416+ @inbounds B[I, i, J] = tmp
411417 end
412418 B
413419end
414420
415421@noinline function _accumulaten! (op, B, A, R1, ind, R2, init:: Some )
416422 # Copy the initial element in each 1d vector along dimension `dim`
417423 ii = first (ind)
418- @inbounds for J in R2, I in R1
419- B[I, ii, J] = op (something (init), A[I, ii, J])
424+ for J in R2, I in R1
425+ tmp = op (something (init), @inbounds (A[I, ii, J]))
426+ @inbounds B[I, ii, J] = tmp
420427 end
421428 # Accumulate
422- @inbounds for J in R2, i in first (ind)+ 1 : last (ind), I in R1
423- B[I, i, J] = op (B[I, i- 1 , J], A[I, i, J])
429+ for J in R2, i in first (ind)+ 1 : last (ind), I in R1
430+ @inbounds Bv, Av = B[I, i- 1 , J], A[I, i, J]
431+ tmp = op (Bv, Av)
432+ @inbounds B[I, i, J] = tmp
424433 end
425434 B
426435end
@@ -434,10 +443,10 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
434443 cur_val = v1
435444 B[i1] = cur_val
436445 next = iterate (inds, state)
437- @inbounds while next != = nothing
446+ while next != = nothing
438447 (i, state) = next
439- cur_val = op (cur_val, A[i])
440- B[i] = cur_val
448+ cur_val = op (cur_val, @inbounds ( A[i]) )
449+ @inbounds B[i] = cur_val
441450 next = iterate (inds, state)
442451 end
443452 return B
0 commit comments