Skip to content

Commit 563467b

Browse files
committed
LoopVectorization tests passed locally.
1 parent f730695 commit 563467b

18 files changed

+513
-196
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1515
[compat]
1616
DocStringExtensions = "0.8"
1717
OffsetArrays = "1"
18-
SIMDPirates = "0.7.25"
19-
SLEEFPirates = "0.4.8"
18+
SIMDPirates = "0.8"
19+
SLEEFPirates = "0.5"
2020
UnPack = "0,1"
21-
VectorizationBase = "0.11.5"
21+
VectorizationBase = "0.12"
2222
julia = "1.1"
2323

2424
[extras]

src/LoopVectorization.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector
77
Static, Zero, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange,
88
AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer,
99
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct,
10-
maybestaticfirst, maybestaticlast, scalar_less, scalar_greater, noalias!, gesp, gepbyte
10+
maybestaticfirst, maybestaticlast, scalar_less, scalar_greater, noalias!, gesp, gepbyte, pointerforcomparison
1111
using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange,
1212
reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
1313
sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,
@@ -24,7 +24,7 @@ using Base.FastMath: add_fast, sub_fast, mul_fast, div_fast
2424

2525
const NativeTypes = Union{Bool, Base.HWReal}
2626

27-
export LowDimArray, stridedpointer, vectorizable,
27+
export LowDimArray, stridedpointer,
2828
@avx, @_avx, *ˡ, _avx_!,
2929
vmap, vmap!, vmapnt, vmapnt!, vmapntt, vmapntt!,
3030
vfilter, vfilter!, vmapreduce, vreduce

src/add_constants.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ function add_constant!(ls::LoopSet, mpref::ArrayReferenceMetaPosition, elementby
3636
temp = gensym(:intermediateconstref)
3737
vloadcall = Expr(:call, lv(:vload), mpref.mref.ptr)
3838
if length(getindices(op)) > 0
39-
push!(vloadcall.args, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), Symbol(""), 0, nothing), false, false))
39+
push!(vloadcall.args, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), Symbol(""), 0, nothing), Bool[]))
4040
end
4141
pushpreamble!(ls, Expr(:(=), temp, vloadcall))
4242
pushpreamble!(ls, op, temp)

src/add_loads.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ function add_load!(ls::LoopSet, op::Operation, actualarray::Bool = true, broadca
1010
opp = ls.opdict[ls.syms_aliasing_refs[id]] # throw an error if not found.
1111
return isstore(opp) ? getop(ls, first(parents(opp))) : opp
1212
end
13-
add_vptr!(ls, op.ref.ref.array, vptr(op.ref), actualarray, broadcast)
13+
add_vptr!(ls, op.ref.ref.array, vptr(op), actualarray, broadcast)
1414
pushop!(ls, op, name(op))
1515
end
1616

@@ -35,7 +35,7 @@ function add_simple_load!(
3535
)
3636
loopdeps = Symbol[s for s ref.indices]
3737
mref = ArrayReferenceMeta(
38-
ref, fill(true, length(loopdeps))
38+
ref, fill(true, length(loopdeps) - isdiscontiguous(ref))
3939
)
4040
add_simple_load!(ls, var, mref, loopdeps, elementbytes, actualarray, broadcast)
4141
end
@@ -48,7 +48,7 @@ function add_simple_load!(
4848
:getindex, memload, loopdeps,
4949
NODEPENDENCY, NOPARENTS, mref
5050
)
51-
add_vptr!(ls, op.ref.ref.array, vptr(op.ref), actualarray, broadcast)
51+
add_vptr!(ls, op.ref.ref.array, vptr(op), actualarray, broadcast)
5252
pushop!(ls, op, var)
5353
end
5454
function add_load_ref!(ls::LoopSet, var::Symbol, ex::Expr, elementbytes::Int)

src/determinestrategy.jl

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -704,14 +704,6 @@ function evaluate_cost_tile(
704704
u₁, u₂, costpenalty * ucost + stride_penalty(ls, order) + outer_reduct_penalty + favoring_heuristics, choose_to_inline[]
705705
end
706706

707-
function should_inline(ls::LoopSet, u₁::Int, u₂::Int)
708-
# Extremely simplistic heuristic
709-
prod(length, ls.loops) 1024^2 && return true
710-
for op operations(ls)
711-
712-
end
713-
false
714-
end
715707

716708
struct LoopOrders
717709
syms::Vector{Symbol}
@@ -866,7 +858,7 @@ function choose_tile(ls::LoopSet)
866858
end
867859
end
868860
ls.loadelimination[] = shouldinline
869-
best_order, bestu₁, bestu₂, best_vec, u₁, u₂, lowest_cost, shouldinline
861+
best_order, bestu₁, bestu₂, best_vec, u₁, u₂, lowest_cost, false#shouldinline
870862
end
871863
# Last in order is the inner most loop
872864
function choose_order_cost(ls::LoopSet)

src/graphs.jl

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -94,42 +94,47 @@ subexpr(ex::Number, incr::Number) = ex - incr
9494
subexpr(ex, incr::Number) = addexpr(ex, -incr)
9595

9696
staticmulincr(ptr, incr) = Expr(:call, lv(:staticmul), Expr(:call, :eltype, ptr), incr)
97-
callpointer(sym) = Expr(:call, :pointer, sym)
97+
callpointerforcomparison(sym) = Expr(:call, lv(:pointerforcomparison), sym)
9898
function vec_looprange(loopmax, UF::Int, mangledname::Symbol, ptrcomp::Bool)
99-
incr = if isone(UF)
100-
Expr(:call, lv(:valsub), VECTORWIDTHSYMBOL, 1)
99+
if ptrcomp
100+
vec_looprange(loopmax, UF, callpointerforcomparison(mangledname), staticmulincr(mangledname, VECTORWIDTHSYMBOL))
101101
else
102-
Expr(:call, lv(:valmulsub), VECTORWIDTHSYMBOL, UF, 1)
102+
vec_looprange(loopmax, UF, mangledname, VECTORWIDTHSYMBOL)
103103
end
104-
incr = ptrcomp ? staticmulincr(mangledname, incr) : incr
105-
compexpr = subexpr(loopmax, incr)
106-
if ptrcomp
107-
Expr(:call, :<, callpointer(mangledname), compexpr)
104+
end
105+
function vec_looprange(loopmax, UF::Int, mangledname, W)
106+
incr = if isone(UF)
107+
Expr(:call, lv(:valsub), W, 1)
108108
else
109-
Expr(:call, :<, mangledname, compexpr)
109+
Expr(:call, lv(:valmulsub), W, UF, 1)
110110
end
111+
compexpr = subexpr(loopmax, incr)
112+
Expr(:call, :<, mangledname, compexpr)
111113
end
112114

113-
function looprange(stopcon, incr::Int, mangledname::Symbol, ptrcomp::Bool)
114-
incr = 1 - incr
115+
# function looprange(stopcon, incr::Int, mangledname::Symbol, ptrcomp::Bool, verbose)
116+
# if ptrcomp
117+
# looprange(stopcon, Expr(:call, lv(:vsub), staticmulincr(mangledname, incr), 1), callpointer(mangledname), verbose)
118+
# else
119+
# looprange(stopcon, incr - 1, mangledname)
120+
# end
121+
# end
122+
# function looprange(stopcon, incr, mangledname, verbose)
123+
# if verbose
124+
# Expr(:call, :<, :(@show $mangledname), :(@show $(subexpr(stopcon, incr))))
125+
# else
126+
# Expr(:call, :<, mangledname, subexpr(stopcon, incr))
127+
# end
128+
# end
129+
function looprange(stopcon, incr::Int, mangledname)
115130
if iszero(incr)
116-
if ptrcomp
117-
Expr(:call, :<, callpointer(mangledname), stopcon)
118-
else
119-
Expr(:call, :<, mangledname, stopcon)
120-
end
121-
elseif ptrcomp
122-
Expr(:call, :<, callpointer(mangledname), addexpr(stopcon, staticmulincr(mangledname, incr)))
131+
Expr(:call, :, mangledname, stopcon)
123132
else
124-
if isone(incr)
125-
Expr(:call, :, mangledname, stopcon)
126-
else
127-
Expr(:call, :<, mangledname, addexpr(stopcon, incr))
128-
end
133+
Expr(:call, :, mangledname, subexpr(stopcon, incr))
129134
end
130135
end
131-
function looprange(loop::Loop, incr::Int, mangledname::Symbol)
132-
loop.stopexact ? looprange(loop.stophint, incr, mangledname, false) : looprange(loop.stopsym, incr, mangledname, false)
136+
function looprange(loop::Loop, incr::Int, mangledname)
137+
loop.stopexact ? looprange(loop.stophint, incr, mangledname) : looprange(loop.stopsym, incr, mangledname)
133138
end
134139
function terminatecondition(
135140
loop::Loop, us::UnrollSpecification, n::Int, mangledname::Symbol, inclmask::Bool, UF::Int = unrollfactor(us, n)
@@ -168,6 +173,23 @@ function incrementloopcounter!(q, us::UnrollSpecification, n::Int, UF::Int = unr
168173
push!(q.args, UF)
169174
end
170175
end
176+
function looplengthexpr(loop::Loop)
177+
if loop.stopexact
178+
if loop.startexact
179+
length(loop)
180+
else
181+
Expr(:call, lv(:vsub), loop.stophint + 1, loop.startsym)
182+
end
183+
elseif loop.startexact
184+
if isone(loop.starthint)
185+
loop.stopsym
186+
else
187+
Expr(:call, lv(:vsub), loop.stopsym, loop.starthint - 1)
188+
end
189+
else
190+
Expr(:call, lv(:vsub), loop.stopsym, Expr(:call, lv(:staticm1), loop.startsym))
191+
end
192+
end
171193

172194
# load/compute/store × isunrolled × istiled × pre/post loop × Loop number
173195
struct LoopOrder <: AbstractArray{Vector{Operation},5}
@@ -199,6 +221,11 @@ Base.@propagate_inbounds Base.getindex(lo::LoopOrder, i...) = lo.oporder[LinearI
199221

200222
@enum NumberType::Int8 HardInt HardFloat IntOrFloat INVALID
201223

224+
struct LoopStartStopManager
225+
terminators::Vector{Int}
226+
incrementedptrs::Vector{Vector{ArrayReferenceMeta}}
227+
uniquearrayrefs::Vector{ArrayReferenceMeta}
228+
end
202229
# Must make it easy to iterate
203230
# outer_reductions is a vector of indices (within operation vectors) of the reduction operation, eg the vmuladd op in a dot product
204231
# O(N) search is faster at small sizes
@@ -228,10 +255,12 @@ struct LoopSet
228255
place_after_loop::Vector{Bool}
229256
unrollspecification::Base.RefValue{UnrollSpecification}
230257
loadelimination::Base.RefValue{Bool}
258+
lssm::Base.RefValue{LoopStartStopManager}
231259
mod::Symbol
232260
end
233261

234262

263+
235264
function cost_vec_buf(ls::LoopSet)
236265
cv = @view(ls.cost_vec[:,2])
237266
@inbounds for i 1:4
@@ -312,7 +341,8 @@ function LoopSet(mod::Symbol)
312341
ArrayReferenceMeta[],
313342
Matrix{Float64}(undef, 4, 2),
314343
Matrix{Float64}(undef, 4, 2),
315-
Bool[], Bool[], Ref{UnrollSpecification}(), Ref(false), mod
344+
Bool[], Bool[], Ref{UnrollSpecification}(),
345+
Ref(false), Ref{LoopStartStopManager}(), mod
316346
)
317347
end
318348

0 commit comments

Comments
 (0)