1
1
2
+ const DenseNativeArray = DenseArray{<: NativeTypes }
3
+
2
4
"""
3
5
`vstorent!` (non-temporal store) requires data to be aligned.
4
6
`alignstores!` will align `y` in preparation for the non-temporal maps.
5
7
"""
6
8
function alignstores! (
7
9
f:: F , y:: DenseArray{T} ,
8
- args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
10
+ args:: Vararg{DenseNativeArray ,A}
9
11
) where {F, T <: Base.HWReal , A}
10
12
N = length (y)
11
13
ptry = VectorizationBase. zero_offsets (stridedpointer (y))
32
34
function vmap_singlethread! (
33
35
f:: F , y:: DenseArray{T} ,
34
36
:: Val{NonTemporal} ,
35
- args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
37
+ args:: Vararg{DenseNativeArray ,A}
36
38
) where {F,T <: Base.HWReal , A, NonTemporal}
37
39
if NonTemporal # if stores into `y` aren't aligned, we'll get a crash
38
40
ptry, ptrargs, N = alignstores! (f, y, args... )
@@ -80,7 +82,7 @@ function vmap_multithreaded!(
80
82
f:: F ,
81
83
y:: DenseArray{T} ,
82
84
:: Val{true} ,
83
- args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
85
+ args:: Vararg{DenseNativeArray ,A}
84
86
) where {F,T,A}
85
87
ptry, ptrargs, N = alignstores! (f, y, args... )
86
88
N > 0 || return y
@@ -107,7 +109,7 @@ function vmap_multithreaded!(
107
109
f:: F ,
108
110
y:: DenseArray{T} ,
109
111
:: Val{false} ,
110
- args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
112
+ args:: Vararg{DenseNativeArray ,A}
111
113
) where {F,T,A}
112
114
N = length (y)
113
115
ptry = VectorizationBase. zero_offsets (stridedpointer (y))
@@ -142,7 +144,7 @@ Vectorized-`map!`, applying `f` to each element of `a` (or paired elements of `a
142
144
and storing the result in `destination`.
143
145
"""
144
146
function vmap! (
145
- f:: F , y:: DenseArray{T} , args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
147
+ f:: F , y:: DenseArray{T} , args:: Vararg{DenseNativeArray ,A}
146
148
) where {F,T<: Base.HWReal ,A}
147
149
vmap_singlethread! (f, y, Val {false} (), args... )
148
150
end
154
156
Like `vmap!` (see `vmap!`), but uses `Threads.@threads` for parallel execution.
155
157
"""
156
158
function vmapt! (
157
- f:: F , y:: DenseArray{T} , args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
159
+ f:: F , y:: DenseArray{T} , args:: Vararg{DenseNativeArray ,A}
158
160
) where {F,T<: Base.HWReal ,A}
159
161
vmap_multithreaded! (f, y, Val {false} (), args... )
160
162
end
@@ -216,7 +218,7 @@ BenchmarkTools.Trial:
216
218
```
217
219
"""
218
220
function vmapnt! (
219
- f:: F , y:: DenseArray{T} , args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
221
+ f:: F , y:: DenseArray{T} , args:: Vararg{DenseNativeArray ,A}
220
222
) where {F,T<: Base.HWReal ,A}
221
223
vmap_singlethread! (f, y, Val {true} (), args... )
222
224
end
227
229
Like `vmapnt!` (see `vmapnt!`), but uses `Threads.@threads` for parallel execution.
228
230
"""
229
231
function vmapntt! (
230
- f:: F , y:: DenseArray{T} , args:: Vararg{<:DenseArray{<:Base.HWReal} ,A}
232
+ f:: F , y:: DenseArray{T} , args:: Vararg{DenseNativeArray ,A}
231
233
) where {F,T<: Base.HWReal ,A}
232
234
vmap_multithreaded! (f, y, Val {true} (), args... )
233
235
end
238
240
@inline vmapnt! (f, args... ) = map! (f, args... )
239
241
@inline vmapntt! (f, args... ) = map! (f, args... )
240
242
241
- function vmap_call (f:: F , vm!:: V , args:: Vararg{<: Any,N} ) where {V,F,N}
243
+ function vmap_call (f:: F , vm!:: V , args:: Vararg{Any,N} ) where {V,F,N}
242
244
T = Base. _return_type (f, Base. Broadcast. eltypes (args))
243
245
dest = similar (first (args), T)
244
246
vm! (f, dest, args... )
@@ -251,15 +253,15 @@ end
251
253
SIMD-vectorized `map`, applying `f` to each element of `a` (or paired elements of `a`, `b`, ...)
252
254
and returning a new array.
253
255
"""
254
- vmap (f:: F , args:: Vararg{<: Any,N} ) where {F,N} = vmap_call (f, vmap!, args... )
256
+ vmap (f:: F , args:: Vararg{Any,N} ) where {F,N} = vmap_call (f, vmap!, args... )
255
257
256
258
"""
257
259
vmapt(f, a::AbstractArray)
258
260
vmapt(f, a::AbstractArray, b::AbstractArray, ...)
259
261
260
262
A threaded variant of [`vmap`](@ref).
261
263
"""
262
- vmapt (f:: F , args:: Vararg{<: Any,N} ) where {F,N} = vmap_call (f, vmapt!, args... )
264
+ vmapt (f:: F , args:: Vararg{Any,N} ) where {F,N} = vmap_call (f, vmapt!, args... )
263
265
264
266
"""
265
267
vmapnt(f, a::AbstractArray)
@@ -268,15 +270,15 @@ vmapt(f::F, args::Vararg{<:Any,N}) where {F,N} = vmap_call(f, vmapt!, args...)
268
270
A "non-temporal" variant of [`vmap`](@ref). This can improve performance in cases where
269
271
`destination` will not be needed soon.
270
272
"""
271
- vmapnt (f:: F , args:: Vararg{<: Any,N} ) where {F,N} = vmap_call (f, vmapnt!, args... )
273
+ vmapnt (f:: F , args:: Vararg{Any,N} ) where {F,N} = vmap_call (f, vmapnt!, args... )
272
274
273
275
"""
274
276
vmapntt(f, a::AbstractArray)
275
277
vmapntt(f, a::AbstractArray, b::AbstractArray, ...)
276
278
277
279
A threaded variant of [`vmapnt`](@ref).
278
280
"""
279
- vmapntt (f:: F , args:: Vararg{<: Any,N} ) where {F,N} = vmap_call (f, vmapntt!, args... )
281
+ vmapntt (f:: F , args:: Vararg{Any,N} ) where {F,N} = vmap_call (f, vmapntt!, args... )
280
282
281
283
282
284
# @inline vmap!(f, y, x...) = @avx y .= f.(x...)
0 commit comments