Skip to content

Commit 9d8fb66

Browse files
committed
a few improvements
1 parent e123cb2 commit 9d8fb66

File tree

13 files changed

+67
-55
lines changed

13 files changed

+67
-55
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.129"
4+
version = "0.12.130"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

benchmark/looptests.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ end
1313
Base.size(::SizedOffsetMatrix{<:Any,LR,UR,LC,UC}) where {LR,UR,LC,UC} =
1414
(UR - LR + 1, UC - LC + 1)
1515
Base.axes(::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} =
16-
(Static{LR}():Static{UR}(), Static{LC}():Static{UC}())
16+
(StaticInt{LR}():StaticInt{UR}(), StaticInt{LC}():StaticInt{UC}())
1717
Base.parent(A::SizedOffsetMatrix) = A.data
1818
Base.unsafe_convert(::Type{Ptr{T}}, A::SizedOffsetMatrix{T}) where {T} = pointer(A.data)
19-
ArrayInterface.contiguous_axis(::Type{<:SizedOffsetMatrix}) = Static(1)
20-
ArrayInterface.contiguous_batch_size(::Type{<:SizedOffsetMatrix}) = Static(0)
21-
ArrayInterface.stride_rank(::Type{<:SizedOffsetMatrix}) = (Static(1), Static(2))
19+
ArrayInterface.contiguous_axis(::Type{<:SizedOffsetMatrix}) = StaticInt(1)
20+
ArrayInterface.contiguous_batch_size(::Type{<:SizedOffsetMatrix}) = StaticInt(0)
21+
ArrayInterface.stride_rank(::Type{<:SizedOffsetMatrix}) = (StaticInt(1), StaticInt(2))
2222
function ArrayInterface.strides(A::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC}
23-
(Static{1}(), (Static{UR}() - Static{LR}() + Static{1}()))
23+
(StaticInt{1}(), (StaticInt{UR}() - StaticInt{LR}() + StaticInt{1}()))
2424
end
2525
ArrayInterface.offsets(A::SizedOffsetMatrix{T,LR,UR,LC,UC}) where {T,LR,UR,LC,UC} =
26-
(Static{LR}(), Static{LC}())
26+
(StaticInt{LR}(), StaticInt{LC}())
2727
ArrayInterface.parent_type(::Type{<:SizedOffsetMatrix{T}}) where {T} = Matrix{T}
2828
Base.getindex(A::SizedOffsetMatrix, i, j) =
2929
LoopVectorization.vload(LoopVectorization.stridedpointer(A), (i, j))

src/LoopVectorization.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ using CloseOpenIntervals: AbstractCloseOpen, CloseOpen#, SafeCloseOpen
150150
@inline static_step(x::CartesianIndices) =
151151
VectorizationBase.CartesianVIndex(map(static_step, x.indices))
152152
# end
153-
const Static = StaticInt
154153

155154
export LowDimArray,
156155
stridedpointer,

src/broadcast.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,11 +191,11 @@ function _strides_expr(@nospecialize(s), @nospecialize(x), R::Vector{Int}, D::Ve
191191
for n Nrange
192192
xₙ_type = x[n]
193193
# xₙ_type = typeof(x).parameters[n]
194-
xₙ_static = xₙ_type <: Static
194+
xₙ_static = xₙ_type <: StaticInt
195195
xₙ_value::Int = xₙ_static ? (xₙ_type.parameters[1])::Int : 0
196196
s_type = s[n]
197197
# s_type = typeof(s).parameters[n]
198-
sₙ_static = s_type <: Static
198+
sₙ_static = s_type <: StaticInt
199199
if sₙ_static
200200
sₙ_value = s_type.parameters[1]
201201
if s_type === One

src/codegen/loopstartstopmanager.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ end
125125
# end
126126
# end
127127
# set_first_stride(x) = x # cross fingers that this works
128-
# @inline onetozeroindexgephack(sptr::AbstractStridedPointer) = gesp(set_first_stride(sptr), (Static{-1}(),)) # go backwords
128+
# @inline onetozeroindexgephack(sptr::AbstractStridedPointer) = gesp(set_first_stride(sptr), (StaticInt{-1}(),)) # go backwords
129129
# @inline onetozeroindexgephack(sptr::AbstractStridedPointer{T,1}) where {T} = sptr
130130
# @inline onetozeroindexgephack(sptr::StridedPointer{T,1}) where {T} = sptr
131131
# @inline onetozeroindexgephack(x) = x
@@ -678,7 +678,7 @@ function pushgespind!(
678678
nostep = if fromgsp | (!index_by_index)
679679
GlobalRef(VectorizationBase, :NullStep)
680680
else
681-
GlobalRef(ArrayInterface.Static, :Zero)
681+
GlobalRef(LoopVectorization, :Zero)
682682
end
683683
ns = Expr(:call, nostep)
684684
if fromgsp

src/modeling/costs.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ end
331331
ifelse(ier.f(r, y), r, y)
332332
end
333333
@inline (ier::IfElseReduced)(x::VecUnroll, y::VecUnroll) =
334-
VecUnroll(fmap(ier, getfield(x, :data), getfield(y, :data)))
334+
VecUnroll(VectorizationBase.fmap(ier, getfield(x, :data), getfield(y, :data)))
335335
@inline function (ier::IfElseReduced)(x::AbstractSIMD, y::NativeTypes)
336336
f = ier.f
337337
r = VectorizationBase.ifelse_reduce(f, x)
@@ -351,7 +351,7 @@ end
351351
VectorizationBase.fmap(ier, VectorizationBase.data(a), VectorizationBase.data(b)),
352352
)
353353

354-
@inline (iec::IfElseCollapser)(a) = VectorizationBase.collapse(IfElseOp(iec.f), a)
354+
@inline (iec::IfElseCollapser)(a) = VectorizationBase.contract(IfElseOp(iec.f), a, StaticInt{1}())
355355
@inline (iec::IfElseCollapser)(a, ::StaticInt{C}) where {C} =
356356
VectorizationBase.contract(IfElseOp(iec.f), a, StaticInt{C}())
357357

@@ -381,8 +381,8 @@ end
381381
@inline (ieo::IfElseOpMirror)(a, b) = ifelse(ieo.f(ieo.a, ieo.b), a, b)
382382

383383
@inline _first_ifelse_reduce_mirror(f::F, a, b) where {F} =
384-
getfield(VectorizationBase.ifelse_reduce_mirror(f, a, b), 1, false)
385-
@inline (ier::IfElseReducerMirror)(a) = _ifelse_reduce_mirror(ier.f, a, ier.a)
384+
getfield(VectorizationBase.ifelse_reduce_mirror(f, a, b), 1)
385+
@inline (ier::IfElseReducerMirror)(a) = _first_ifelse_reduce_mirror(ier.f, a, ier.a)
386386
@inline function _ifelse_reduce_mirror(f::F, a, b, c, d) where {F}
387387
r, rm = VectorizationBase.ifelse_reduce_mirror(f, b, d)
388388
ifelse(f(c, rm), a, r)
@@ -416,13 +416,13 @@ end
416416
@inline (ier::IfElseReducedMirror)(x::AbstractSIMD{W}, y::AbstractSIMD{W}) where {W} =
417417
ifelse(ier.f(ier.a, ier.b), x, y)
418418
@inline function _reduce_mirror(f::F, x, y, a, b) where {F}
419-
r, rm = IfElseReduceToMirror(f, a, b)(x, y)
419+
r, _ = IfElseReducedMirror(f, a, b)(x, y)
420420
ifelse(f(r, y), r, y)
421421
end
422422
@inline (ier::IfElseReducedMirror)(x::AbstractSIMD, y::AbstractSIMD) =
423423
_reduce_mirror(ier.f, x, y, ier.a, ier.b)
424424
@inline (ier::IfElseReducedMirror)(x::VecUnroll, y::VecUnroll) = VecUnroll(
425-
fmap(
425+
VectorizationBase.fmap(
426426
_reduce_mirror,
427427
ier.f,
428428
getfield(x, :data),

src/modeling/graphs.jl

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,7 +1202,6 @@ function indices_loop!(ls::LoopSet, r::Expr, itersym::Symbol)::Loop
12021202
end
12031203
end
12041204
push!(ls.equalarraydims, (vptrs, mdims))
1205-
# push!(ls.equalarraydims, ids)
12061205
end
12071206
end
12081207
end
@@ -1601,13 +1600,13 @@ function add_assignment!(ls::LoopSet, LHS, RHS, elementbytes::Int, position::Int
16011600
end
16021601
end
16031602

1604-
function Base.push!(
1603+
function push_op!(
16051604
ls::LoopSet,
16061605
ex::Expr,
16071606
elementbytes::Int,
16081607
position::Int,
16091608
mpref::Union{Nothing,ArrayReferenceMetaPosition} = nothing,
1610-
)
1609+
)::Operation
16111610
if ex.head === :call
16121611
finex = first(ex.args)::Symbol
16131612
if finex === :setindex!
@@ -1621,14 +1620,10 @@ function Base.push!(
16211620
position,
16221621
)
16231622
else
1624-
error("Function $finex not recognized.")
1623+
throw(LoopError("Don't know how to handle expression.", finex))
16251624
end
16261625
elseif ex.head === :(=)
16271626
add_assignment!(ls, ex.args[1], ex.args[2], elementbytes, position)
1628-
elseif ex.head === :block
1629-
add_block!(ls, ex, elementbytes, position)
1630-
elseif ex.head === :for
1631-
add_loop!(ls, ex, elementbytes)
16321627
elseif ex.head === :&&
16331628
add_andblock!(ls, ex, elementbytes, position)
16341629
elseif ex.head === :||
@@ -1641,9 +1636,9 @@ function Base.push!(
16411636
LHS = (localbody.args[1])::Symbol
16421637
RHS_1 = localbody.args[2]
16431638
if RHS_1 isa Symbol
1644-
return push!(ls, localbody, elementbytes, position, mpref)
1639+
return push_op!(ls, localbody, elementbytes, position, mpref)
16451640
elseif Meta.isexpr(RHS_1, :(=), 2)
1646-
RHS = push!(ls, RHS_1, elementbytes, position, mpref)
1641+
RHS = push_op!(ls, RHS_1, elementbytes, position, mpref)
16471642
if isstore(RHS)
16481643
RHS
16491644
else
@@ -1657,6 +1652,22 @@ function Base.push!(
16571652
end
16581653
end
16591654

1655+
function Base.push!(
1656+
ls::LoopSet,
1657+
ex::Expr,
1658+
elementbytes::Int,
1659+
position::Int,
1660+
mpref::Union{Nothing,ArrayReferenceMetaPosition} = nothing,
1661+
)
1662+
if ex.head === :block
1663+
add_block!(ls, ex, elementbytes, position)
1664+
elseif ex.head === :for
1665+
add_loop!(ls, ex, elementbytes)
1666+
else
1667+
push_op!(ls, ex, elementbytes, position, mpref)
1668+
end
1669+
end
1670+
16601671
function UnrollSpecification(
16611672
ls::LoopSet,
16621673
u₁loop::Symbol,

src/reconstruct_loopset.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ function Loop(
9999
::LoopSet,
100100
::Expr,
101101
sym::Symbol,
102-
::Type{OptionallyStaticUnitRange{Static{L},Static{U}}},
102+
::Type{OptionallyStaticUnitRange{StaticInt{L},StaticInt{U}}},
103103
) where {L,U}
104104
static_loop(sym, L, 1, U)
105105
end
@@ -116,7 +116,7 @@ function Loop(
116116
::Expr,
117117
sym::Symbol,
118118
::Type{CO},
119-
) where {L,U,CO<:AbstractCloseOpen{Static{L},Static{U}}}
119+
) where {L,U,CO<:AbstractCloseOpen{StaticInt{L},StaticInt{U}}}
120120
static_loop(sym, L, 1, U - 1)
121121
end
122122

@@ -259,7 +259,7 @@ end
259259

260260

261261
extract_varg(i) = :(getfield(var"#vargs#", $i, false))
262-
# _extract(::Type{Static{N}}) where {N} = N
262+
# _extract(::Type{StaticInt{N}}) where {N} = N
263263
extract_gsp!(sptrs::Expr, name::Symbol) = (push!(sptrs.args, name); nothing)
264264
tupleranks(R::NTuple{8,Int}) = ntuple(n -> sum(R[n] .≥ R), Val{8}())
265265
function rank_to_sortperm((R, N)::Tuple{NTuple{8,Int},Int})::Vector{Int}

src/simdfunctionals/filter.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ function vfilter!(f::F, x::Vector{T}, y::AbstractArray{T}) where {F,T<:NativeTyp
66
Nrem = N & (W - 1)
77
j = 0
88
st = VectorizationBase.static_sizeof(T)
9-
zero_index = MM(W, Static(0), st)
9+
zero_index = MM(W, StaticInt(0), st)
1010
incr = W * VectorizationBase.static_sizeof(T)
1111
GC.@preserve x y begin
1212
# ptr_x = llvmptr(x); ptr_y = llvmptr(y)

src/simdfunctionals/map.jl

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ function setup_vmap!(
1313
ptrargs = map(VectorizationBase.zstridedpointer, args)
1414
V = pick_vector_width(T)
1515
W = unwrap(V)
16-
zero_index = (MM{W}(Static(0)),)
16+
zero_index = (MM{W}(StaticInt(0)),)
1717
uintptry = reinterpret(UInt, pointer(ptry))
1818
@assert iszero(uintptry & (sizeof(T) - 1)) "The destination vector (`dest`) must be aligned to `sizeof(eltype(dest)) == $(sizeof(T))` bytes."
1919
alignment = uintptry & (register_size() - 1)
@@ -233,18 +233,12 @@ function vmap_multithread!(
233233
end
234234
nothing
235235
end
236-
@generated function gc_preserve_vmap!(
237-
f::F,
238-
y::AbstractArray,
239-
::Val{NonTemporal},
240-
::Val{Threaded},
241-
args::Vararg{AbstractArray,A},
242-
) where {F,A,NonTemporal,Threaded}
236+
function gc_preserve_vmap_quote(NonTemporal::Bool, Threaded::Bool, A::Int)
243237
m = Threaded ? :vmap_multithread! : :vmap_singlethread!
244238
call = Expr(:call, m, :f, :y, Expr(:call, Expr(:curly, :Val, NonTemporal)))
245239
q = Expr(:block, Expr(:meta, :inline))
246240
gcpres = Expr(:gc_preserve, call)
247-
for a 1:A
241+
for a 1:Int(A)::Int
248242
arg = Symbol(:arg_, a)
249243
parg = Symbol(:parg_, a)
250244
push!(q.args, Expr(:(=), arg, :(@inbounds args[$a])))#Expr(:ref, :args, a)))
@@ -255,6 +249,15 @@ end
255249
push!(q.args, gcpres, :y)
256250
q
257251
end
252+
@generated function gc_preserve_vmap!(
253+
f::F,
254+
y::AbstractArray,
255+
::Val{NonTemporal},
256+
::Val{Threaded},
257+
args::Vararg{AbstractArray,A},
258+
) where {F,A,NonTemporal,Threaded}
259+
gc_preserve_vmap_quote(NonTemporal,Threaded,A)
260+
end
258261

259262

260263
@inline _all_dense(t::Tuple{ArrayInterface.True}) = true

0 commit comments

Comments
 (0)