Skip to content

Commit cf4af69

Browse files
committed
merge master
2 parents 75b9387 + c5bbc26 commit cf4af69

File tree

7 files changed

+56
-53
lines changed

7 files changed

+56
-53
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.38"
4+
version = "0.12.39"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -30,5 +30,5 @@ Static = "0.2"
3030
StrideArraysCore = "0.1.12"
3131
ThreadingUtilities = "0.4.2"
3232
UnPack = "1"
33-
VectorizationBase = "0.20.16"
33+
VectorizationBase = "0.20.17"
3434
julia = "1.5"

src/LoopVectorization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ using Static: StaticInt, gt
44
using VectorizationBase, SLEEFPirates, UnPack, OffsetArrays
55
using VectorizationBase: register_size, register_count, cache_linesize, cache_size, has_opmask_registers,
66
mask, pick_vector_width, MM, AbstractMask, data, grouped_strided_pointer, AbstractSIMD,
7-
maybestaticlength, maybestaticsize, vzero, maybestaticrange, offsetprecalc, lazymul,
7+
vzero, offsetprecalc, lazymul,
88
vadd_nw, vadd_nsw, vadd_nuw, vsub_nw, vsub_nsw, vsub_nuw, vmul_nw, vmul_nsw, vmul_nuw,
99
maybestaticfirst, maybestaticlast, gep, gesp, NativeTypes, #llvmptr,
1010
vfmadd, vfmsub, vfnmadd, vfnmsub, vfmadd_fast, vfmsub_fast, vfnmadd_fast, vfnmsub_fast, vfmadd231, vfmsub231, vfnmadd231, vfnmsub231,

src/broadcast.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ function add_broadcast_loops!(ls::LoopSet, loopsyms::Vector{Symbol}, destsym::Sy
385385
push!(axes_tuple.args, Nrange)
386386
pushpreamble!(ls, Expr(:(=), Nlower, Expr(:call, lv(:maybestaticfirst), Nrange)))
387387
pushpreamble!(ls, Expr(:(=), Nupper, Expr(:call, lv(:maybestaticlast), Nrange)))
388-
pushpreamble!(ls, Expr(:(=), Nlen, Expr(:call, lv(:maybestaticlength), Nrange)))
388+
pushpreamble!(ls, Expr(:(=), Nlen, Expr(:call, GlobalRef(ArrayInterface,:static_length), Nrange)))
389389
end
390390
end
391391
# size of dest determines loops

src/codegen/split_loops.jl

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -89,38 +89,41 @@ function returned_ops(ls::LoopSet)
8989
end
9090

9191
function lower_and_split_loops(ls::LoopSet, inline::Int)
92-
split_candidates = returned_ops(ls)
93-
length(split_candidates) > 1 || return lower(ls, inline)
94-
order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, cost_fused, shouldinline_fused = choose_order_cost(ls)
95-
remaining_ops = Vector{Int}(undef, length(split_candidates) - 1); split_1 = Int[0];
96-
# for (ind,i) ∈ enumerate(split_candidates)
97-
for (ind,i) enumerate(split_candidates)
98-
split_1[1] = i
99-
ls_1 = split_loopset(ls, split_1)
100-
order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, cost_1, shouldinline_1 = choose_order_cost(ls_1)
101-
remaining_ops[1:ind-1] .= @view(split_candidates[1:ind-1]); remaining_ops[ind:end] .= @view(split_candidates[ind+1:end])
102-
ls_2 = split_loopset(ls, remaining_ops)
103-
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost(ls_2)
104-
# U_1 = T_1 = U_2 = T_2 = 2
105-
if cost_1 + cost_2 cost_fused
106-
ls_2_lowered = if length(remaining_ops) > 1
107-
inline = iszero(inline) ? (shouldinline_1 % Int) : inline
108-
lower_and_split_loops(ls_2, inline)
109-
else
110-
doinline = inlinedecision(inline, shouldinline_1 | shouldinline_2)
111-
lower(ls_2, order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, doinline)
112-
end
113-
return Expr(
114-
:block,
115-
ls.preamble,
116-
lower(ls_1, order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, false),
117-
ls_2_lowered,
118-
nothing
119-
)
120-
end
92+
split_candidates = returned_ops(ls)
93+
length(split_candidates) > 1 || return lower(ls, inline)
94+
order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, cost_fused, shouldinline_fused = choose_order_cost(ls)
95+
remaining_ops = Vector{Int}(undef, length(split_candidates) - 1); split_1 = Int[0];
96+
# for (ind,i) ∈ enumerate(split_candidates)
97+
for (ind,i) enumerate(split_candidates)
98+
split_1[1] = i
99+
ls_1 = split_loopset(ls, split_1)
100+
order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, cost_1, shouldinline_1 = choose_order_cost(ls_1)
101+
remaining_ops[1:ind-1] .= @view(split_candidates[1:ind-1]); remaining_ops[ind:end] .= @view(split_candidates[ind+1:end])
102+
ls_2 = split_loopset(ls, remaining_ops)
103+
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost(ls_2)
104+
# U_1 = T_1 = U_2 = T_2 = 2
105+
# return ls_1, ls_2
106+
# @show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
107+
if cost_1 + cost_2 0.9cost_fused
108+
ls_2_lowered = if length(remaining_ops) > 1
109+
inline = iszero(inline) ? (shouldinline_1 % Int) : inline
110+
lower_and_split_loops(ls_2, inline)
111+
else
112+
doinline = inlinedecision(inline, shouldinline_1 | shouldinline_2)
113+
lower(ls_2, order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, doinline)
114+
end
115+
return Expr(
116+
:block,
117+
ls.preamble,
118+
lower(ls_1, order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, false),
119+
ls_2_lowered,
120+
nothing
121+
)
121122
end
122-
doinline = inlinedecision(inline, shouldinline_fused)
123-
lower(ls, order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, doinline)
123+
length(split_candidates) == 2 && break
124+
end
125+
doinline = inlinedecision(inline, shouldinline_fused)
126+
lower(ls, order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, doinline)
124127
end
125128

126129

src/modeling/graphs.jl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -756,21 +756,21 @@ function add_block!(ls::LoopSet, ex::Expr, elementbytes::Int, position::Int)
756756
end
757757
end
758758
function maybestatic!(expr::Expr)
759-
if expr.head === :call
760-
f = first(expr.args)
761-
if f === :length
762-
expr.args[1] = lv(:maybestaticlength)
763-
elseif f === :size && length(expr.args) == 3
764-
i = expr.args[3]
765-
if i isa Integer
766-
expr.args[1] = lv(:maybestaticsize)
767-
expr.args[3] = Expr(:call, Expr(:curly, :Val, convert(Int, i)))
768-
end
769-
else
770-
static_literals!(expr)
771-
end
759+
if expr.head === :call
760+
f = first(expr.args)
761+
if f === :length
762+
expr.args[1] = GlobalRef(ArrayInterface,:static_length)
763+
elseif f === :size && length(expr.args) == 3
764+
i = expr.args[3]
765+
if i isa Integer
766+
expr.args[1] = GlobalRef(ArrayInterface,:size)
767+
expr.args[3] = staticexpr(convert(Int,i)::Int)
768+
end
769+
else
770+
static_literals!(expr)
772771
end
773-
expr
772+
end
773+
expr
774774
end
775775
add_loop_bound!(ls::LoopSet, itersym::Symbol, bound::Union{Integer,Symbol}, upper::Bool, step::Bool)::MaybeKnown = MaybeKnown(bound, upper ? 1024 : 1)
776776
function add_loop_bound!(ls::LoopSet, itersym::Symbol, bound::Expr, upper::Bool, step::Bool)::MaybeKnown
@@ -797,7 +797,7 @@ function range_loop!(ls::LoopSet, itersym::Symbol, l::MaybeKnown, u::MaybeKnown,
797797
isone(s) || pushexpr!(range, s)
798798
pushexpr!(range, u)
799799
pushprepreamble!(ls, Expr(:(=), rangename, range))
800-
pushprepreamble!(ls, Expr(:(=), lenname, Expr(:call, lv(:maybestaticlength), rangename)))
800+
pushprepreamble!(ls, Expr(:(=), lenname, Expr(:call, GlobalRef(ArrayInterface,:static_length), rangename)))
801801
Loop(itersym, l, u, s, rangename, lenname)
802802
end
803803
function range_loop!(ls::LoopSet, r::Expr, itersym::Symbol)::Loop
@@ -853,7 +853,7 @@ end
853853
function misc_loop!(ls::LoopSet, r::Union{Expr,Symbol}, itersym::Symbol, staticstepone::Bool)::Loop
854854
rangename = gensym!(ls, "looprange" * string(itersym)); lenname = gensym!(ls, "looplen" * string(itersym));
855855
pushprepreamble!(ls, Expr(:(=), rangename, Expr(:call, lv(:canonicalize_range), :(@inbounds $(static_literals!(r))))))
856-
pushprepreamble!(ls, Expr(:(=), lenname, Expr(:call, lv(:maybestaticlength), rangename)))
856+
pushprepreamble!(ls, Expr(:(=), lenname, Expr(:call, GlobalRef(ArrayInterface,:static_length), rangename)))
857857
L = add_loop_bound!(ls, itersym, Expr(:call, lv(:maybestaticfirst), rangename), false, false)
858858
U = add_loop_bound!(ls, itersym, Expr(:call, lv(:maybestaticlast), rangename), true, false)
859859
if staticstepone

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function Loop(ls::LoopSet, ex::Expr, sym::Symbol, f, s, l, ub::Int)
3030
rangesym = gensym(ssym * "_loop");
3131
lensym = gensym(ssym * "_looplen")
3232
pushpreamble!(ls, Expr(:(=), rangesym, ex))
33-
pushpreamble!(ls, Expr(:(=), lensym, Expr(:call, lv(:maybestaticlength), rangesym)))
33+
pushpreamble!(ls, Expr(:(=), lensym, Expr(:call, GlobalRef(ArrayInterface,:static_length), rangesym)))
3434
F = if f === nothing
3535
start = gensym(ssym*"_loopstart")
3636
pushpreamble!(ls, Expr(:(=), start, Expr(:call, %, Expr(:call, lv(:first), rangesym), Int)))

test/gemv.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ using Test
8787
δ = 0
8888
z = zero(eltype(G))
8989
@turbo for d1=1:d
90-
G[d1,κ] = z
90+
G[d1,κ - δ] = z
9191
for d2=1:d
9292
G[d1, κ - δ] += B[d2, d1] * B[d2, κ + δ]
9393
end

0 commit comments

Comments
 (0)