Skip to content

Commit 9fed32f

Browse files
committed
Add vmap support for ranges
1 parent eb50656 commit 9fed32f

File tree

3 files changed

+64
-61
lines changed

3 files changed

+64
-61
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.52"
4+
version = "0.12.53"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/simdfunctionals/map.jl

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@ function vmap_singlethread!(
5454
::Val{NonTemporal},
5555
args::Vararg{AbstractArray,A}
5656
) where {F,T <: Base.HWReal, A, NonTemporal}
57-
ptry, ptrargs, N = setup_vmap!(f, y, Val{NonTemporal}(), args...)
58-
vmap_singlethread!(f, ptry, Zero(), N, Val{NonTemporal}(), ptrargs)
59-
nothing
57+
ptry, ptrargs, N = setup_vmap!(f, y, Val{NonTemporal}(), args...)
58+
_vmap_singlethread!(f, ptry, Zero(), N, Val{NonTemporal}(), ptrargs)
59+
nothing
6060
end
61-
function vmap_singlethread!(
62-
f::F, ptry::AbstractStridedPointer{T},
63-
start, N, ::Val{NonTemporal},
64-
ptrargs::Tuple{Vararg{AbstractStridedPointer,A}}
61+
function _vmap_singlethread!(
62+
f::F, ptry::AbstractStridedPointer{T},
63+
start, N, ::Val{NonTemporal},
64+
ptrargs::Tuple{Vararg{Any,A}}
6565
) where {F, T, NonTemporal, A}
6666
i = convert(Int, start)
6767
V = VectorizationBase.pick_vector_width(promote_type(T, reduce(promote_type, map(eltype, ptrargs))))
@@ -111,25 +111,25 @@ end
111111

112112
abstract type AbstractVmapClosure{NonTemporal,F,D,N,A<:Tuple{Vararg{StridedPointer,N}}} <: Function end
113113
struct VmapClosure{NonTemporal,F,D,N,A} <: AbstractVmapClosure{NonTemporal,F,D,N,A}
114-
f::F
115-
function VmapClosure{NonTemporal}(f::F, ::D, ::A) where {NonTemporal,F,D,N,A<:Tuple{Vararg{StridedPointer,N}}}
116-
new{NonTemporal,F,D,N,A}(f)
117-
end
114+
f::F
115+
function VmapClosure{NonTemporal}(f::F, ::D, ::A) where {NonTemporal,F,D,N,A<:Tuple{Vararg{StridedPointer,N}}}
116+
new{NonTemporal,F,D,N,A}(f)
117+
end
118118
end
119119
# struct VmapKnownClosure{NonTemporal,F,D,N,A} <: AbstractVmapClosure{NonTemporal,F,D,N,A} end
120120

121121
# @generated function (::VmapKnownClosure{NonTemporal,F,D,N,A})(p::Ptr{UInt}) where {NonTemporal,F,D,N,A}
122122
# :(_vmap_thread_call!($(F.instance), p, $D, $A, Val{$NonTemporal}()))
123123
# end
124124
function (m::VmapClosure{NonTemporal,F,D,N,A})(p::Ptr{UInt}) where {NonTemporal,F,D,N,A}
125-
(offset, dest) = ThreadingUtilities.load(p, D, 2*sizeof(UInt))
126-
(offset, args) = ThreadingUtilities.load(p, A, offset)
127-
128-
(offset, start) = ThreadingUtilities.load(p, Int, offset)
129-
(offset, stop ) = ThreadingUtilities.load(p, Int, offset)
130-
131-
vmap_singlethread!(m.f, dest, start, stop, Val{NonTemporal}(), args)
132-
nothing
125+
(offset, dest) = ThreadingUtilities.load(p, D, 2*sizeof(UInt))
126+
(offset, args) = ThreadingUtilities.load(p, A, offset)
127+
128+
(offset, start) = ThreadingUtilities.load(p, Int, offset)
129+
(offset, stop ) = ThreadingUtilities.load(p, Int, offset)
130+
131+
_vmap_singlethread!(m.f, dest, start, stop, Val{NonTemporal}(), args)
132+
nothing
133133
end
134134

135135
@inline function _get_fptr(cfunc::Base.CFunction)
@@ -146,23 +146,23 @@ end
146146
@inline function setup_thread_vmap!(
147147
p, cfunc, ptry, ptrargs, start, stop
148148
)
149-
fptr = _get_fptr(cfunc)
150-
offset = ThreadingUtilities.store!(p, fptr, sizeof(UInt))
151-
offset = ThreadingUtilities.store!(p, ptry, offset)
152-
offset = ThreadingUtilities.store!(p, ptrargs, offset)
153-
offset = ThreadingUtilities.store!(p, start, offset)
154-
offset = ThreadingUtilities.store!(p, stop, offset)
155-
nothing
149+
fptr = _get_fptr(cfunc)
150+
offset = ThreadingUtilities.store!(p, fptr, sizeof(UInt))
151+
offset = ThreadingUtilities.store!(p, ptry, offset)
152+
offset = ThreadingUtilities.store!(p, ptrargs, offset)
153+
offset = ThreadingUtilities.store!(p, start, offset)
154+
offset = ThreadingUtilities.store!(p, stop, offset)
155+
nothing
156156
end
157157
@inline function launch_thread_vmap!(tid, cfunc, ptry, ptrargs, start, stop)
158-
ThreadingUtilities.launch(tid, cfunc, ptry, ptrargs, start, stop) do p, cfunc, ptry, ptrargs, start, stop
159-
setup_thread_vmap!(p, cfunc, ptry, ptrargs, start, stop)
160-
end
158+
ThreadingUtilities.launch(tid, cfunc, ptry, ptrargs, start, stop) do p, cfunc, ptry, ptrargs, start, stop
159+
setup_thread_vmap!(p, cfunc, ptry, ptrargs, start, stop)
160+
end
161161
end
162162

163163
@inline function vmap_closure(f::F, ptry::D, ptrargs::A, ::Val{NonTemporal}) where {F,D<:StridedPointer,N,A<:Tuple{Vararg{StridedPointer,N}},NonTemporal}
164-
vmc = VmapClosure{NonTemporal}(f, ptry, ptrargs)
165-
@cfunction($vmc, Cvoid, (Ptr{UInt},))
164+
vmc = VmapClosure{NonTemporal}(f, ptry, ptrargs)
165+
@cfunction($vmc, Cvoid, (Ptr{UInt},))
166166
end
167167
# @inline function _cfunc_closure(f, ptry, ptrargs, ::Val{NonTemporal}) where {NonTemporal}
168168
# vmc = VmapClosure{NonTemporal}(f, ptry, ptrargs)
@@ -197,7 +197,7 @@ function vmap_multithread!(
197197

198198
# if !((nt > 1) && iszero(ccall(:jl_in_threaded_region, Cint, ())))
199199
if nt < 2
200-
vmap_singlethread!(f, ptry, Zero(), N, Val{NonTemporal}(), ptrargs)
200+
_vmap_singlethread!(f, ptry, Zero(), N, Val{NonTemporal}(), ptrargs)
201201
return
202202
end
203203

@@ -214,7 +214,7 @@ function vmap_multithread!(
214214
launch_thread_vmap!(tid, cfunc, ptry, ptrargs, start, stop)
215215
start = stop
216216
end
217-
vmap_singlethread!(f, ptry, start, N, Val{NonTemporal}(), ptrargs)
217+
_vmap_singlethread!(f, ptry, start, N, Val{NonTemporal}(), ptrargs)
218218
for tid 1:nt-1
219219
ThreadingUtilities.wait(tid)
220220
end

test/map.jl

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,33 @@
11
@testset "map" begin
2-
@inline foo(x, y) = exp(x) - sin(y)
3-
for T (Float32,Float64)
4-
@show T, @__LINE__
5-
for N [ 3, 371 ]
6-
a = rand(T, N); b = rand(T, N);
7-
c0 = vmapntt(foo, a, b);
8-
c3 = similar(c0) # not aligned
9-
fill!(c3, NaN); @views vmapntt!(foo, c3[2:end], a[2:end], b[2:end]);
10-
c1 = map(foo, a, b);
11-
c2 = vmap(foo, a, b);
12-
@test c1 c2
13-
c2 = vmapt(foo, a, b);
14-
@test c1 c2
15-
c2 = vmapnt(foo, a, b);
16-
@test c1 c2
17-
fill!(c2, NaN); @views vmapnt!(foo, c2[2:end], a[2:end], b[2:end]);
18-
@test @views c1[2:end] c2[2:end]
19-
sleep(1e-3) # non-temporal stores won't be automatically synced/coherant, so need to wait!
20-
@test c0 c1
21-
@test isnan(c3[begin])
22-
@test @views c1[2:end] c3[2:end]
23-
end
24-
25-
c = rand(T,100); x = rand(T,10^4); y1 = similar(x); y2 = similar(x);
26-
map!(xᵢ -> clenshaw(xᵢ, c), y1, x)
27-
vmap!(xᵢ -> clenshaw(xᵢ, c), y2, x)
28-
@test y1 y2
2+
@inline foo(x, y) = exp(x) - sin(y)
3+
for T (Float32,Float64)
4+
@show T, @__LINE__
5+
for N [ 3, 371 ]
6+
a = rand(T, N); b = rand(T, N);
7+
c0 = vmapntt(foo, a, b);
8+
c3 = similar(c0) # not aligned
9+
fill!(c3, NaN); @views vmapntt!(foo, c3[2:end], a[2:end], b[2:end]);
10+
c1 = map(foo, a, b);
11+
c2 = vmap(foo, a, b);
12+
@test c1 c2
13+
c2 = vmapt(foo, a, b);
14+
@test c1 c2
15+
c2 = vmapnt(foo, a, b);
16+
@test c1 c2
17+
fill!(c2, NaN); @views vmapnt!(foo, c2[2:end], a[2:end], b[2:end]);
18+
@test @views c1[2:end] c2[2:end]
19+
sleep(1e-3) # non-temporal stores won't be automatically synced/coherant, so need to wait!
20+
@test c0 c1
21+
@test isnan(c3[begin])
22+
@test @views c1[2:end] c3[2:end]
2923
end
24+
25+
c = rand(T,100); x = rand(T,10^4); y1 = similar(x); y2 = similar(x);
26+
map!(xᵢ -> clenshaw(xᵢ, c), y1, x)
27+
vmap!(xᵢ -> clenshaw(xᵢ, c), y2, x)
28+
@test y1 y2
29+
end
30+
@test vmap(abs2, 1:100) == map(abs2, 1:100)
31+
@test vmap(abs2, 1:3:1000) == map(abs2, 1:3:1000)
32+
@test vmap(abs2, 1.0:3.0:1000.0) map(abs2, 1.0:3.0:1000.0)
3033
end

0 commit comments

Comments
 (0)