Skip to content

Commit ab24678

Browse files
authored
Create vsum (#496)
* Create `vsum` * Add docs and unittests for `vsum` * Export `vsum`
1 parent c8a1414 commit ab24678

File tree

4 files changed

+34
-0
lines changed

4 files changed

+34
-0
lines changed

docs/src/vectorized_convenience_functions.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,4 +132,22 @@ julia> @btime mapreduce(hypot, +, $x, $y)
132132
96.75538300513509
133133
```
134134

135+
## vsum
136+
137+
Vectorized version of `sum`. `vsum(f, a)` applies `f(a[i])` for `i in eachindex(a)`, then sums the results.
138+
139+
```julia
140+
julia> using LoopVectorization, BenchmarkTools
141+
142+
julia> x = rand(127);
143+
144+
julia> @btime vsum(hypot, $x)
145+
12.095 ns (0 allocations: 0 bytes)
146+
66.65246070098374
147+
148+
julia> @btime sum(hypot, $x)
149+
16.992 ns (0 allocations: 0 bytes)
150+
66.65246070098372
151+
```
152+
135153

src/LoopVectorization.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ export LowDimArray,
196196
vfilter,
197197
vfilter!,
198198
vmapreduce,
199+
vsum,
199200
vreduce,
200201
vcount
201202

@@ -245,6 +246,7 @@ loop-reordering so as to improve performance:
245246
- [`@turbo`](@ref): transform `for`-loops and broadcasting
246247
- [`vmapreduce`](@ref): vectorized version of `mapreduce`
247248
- [`vreduce`](@ref): vectorized version of `reduce`
249+
- [`vsum`](@ref): vectorized version of `sum`
248250
- [`vmap`](@ref) and `vmap!`: vectorized version of `map` and `map!`
249251
- [`vmapnt`](@ref) and `vmapnt!`: non-temporal variants of `vmap` and `vmap!`
250252
- [`vmapntt`](@ref) and `vmapntt!`: threaded variants of `vmapnt` and `vmapnt!`

src/simdfunctionals/mapreduce.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import VectorizationBase: vsum
12

23
@inline vreduce(::typeof(+), v::VectorizationBase.AbstractSIMDVector) = vsum(v)
34
@inline vreduce(::typeof(*), v::VectorizationBase.AbstractSIMDVector) = vprod(v)
@@ -107,6 +108,16 @@ end
107108
end
108109
@inline vmapreduce(f, op, args...) = mapreduce(f, op, args...)
109110

111+
"""
112+
vsum(A::DenseArray)
113+
vsum(f, A::DenseArray)
114+
115+
Vectorized version of `sum`. Providing a function as the first argument
116+
will apply the function to each element of `A` before summing.
117+
"""
118+
@inline vsum(f::F, A::AbstractArray{T}) where {F,T<:NativeTypes} = vmapreduce(f, +, A)
119+
@inline vsum(A::AbstractArray{T}) where {T<:NativeTypes} = vsum(identity, A)
120+
110121
length_one_axis(::Base.OneTo) = Base.OneTo(1)
111122
length_one_axis(::Any) = 1:1
112123

test/mapreduce.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@
6060
end
6161
@test vmapreduce(log, +, x) sum(log, x)
6262
@test vmapreduce(abs2, +, x) sum(abs2, x)
63+
@test vsum(log, x) sum(log, x)
64+
@test vsum(abs2, x) sum(abs2, x)
65+
@test vsum(x) sum(x)
6366
@test maximum(x) == vreduce(max, x) == maximum_avx(x)
6467
@test minimum(x) == vreduce(min, x) == minimum_avx(x)
6568

0 commit comments

Comments
 (0)