Skip to content

Commit e243299

Browse files
committed
Support using loop variables for things other than indexing. Fixes #22, fixes #36.
1 parent 39dc532 commit e243299

14 files changed

+191
-35
lines changed

src/add_compute.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,13 @@ function add_compute!(
156156
else
157157
add_parent!(parents, deps, reduceddeps, ls, arg, elementbytes)
158158
end
159+
elseif arg ls.loopsymbols
160+
loopsym = gensym(arg)
161+
pushpreamble!(ls, Expr(:(=), loopsym, LoopValue()))
162+
loopsymop = add_simple_load!(ls, gensym(loopsym), ArrayReference(loopsym, [arg]), elementbytes)
163+
push!(ls.syms_aliasing_refs, name(loopsymop))
164+
push!(ls.refs_aliasing_syms, loopsymop.ref)
165+
pushparent!(parents, deps, reduceddeps, loopsymop)
159166
else
160167
add_parent!(parents, deps, reduceddeps, ls, arg, elementbytes)
161168
end

src/add_constants.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function add_constant!(ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPositi
1212
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS, mpref.mref)
1313
add_vptr!(ls, op)
1414
temp = gensym(:intermediateconstref)
15-
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(zero(Int32), Symbol(""), Symbol(""), nothing)))))
15+
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(0, Symbol(""), Symbol(""), nothing)))))
1616
pushpreamble!(ls, op, temp)
1717
pushop!(ls, op, temp)
1818
end

src/add_ifelse.jl

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ function add_if!(ls::LoopSet, LHS::Symbol, RHS::Expr, elementbytes::Int = 8, mpr
1919
add_compute!(ls, LHS, :vifelse, [condop, trueop, falseop], elementbytes)
2020
end
2121

22-
function add_andblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int)
23-
rhsop = add_compute!(ls, gensym(:iftruerhs), RHS, elementbytes)
22+
function add_andblock!(ls::LoopSet, condop::Operation, LHS, rhsop::Operation, elementbytes::Int)
2423
if LHS isa Symbol
2524
altop = getop(ls, LHS)
2625
return add_compute!(ls, LHS, :vifelse, [condop, rhsop, altop], elementbytes)
@@ -30,6 +29,14 @@ function add_andblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::I
3029
throw("Don't know how to assign onto $LHS.")
3130
end
3231
end
32+
function add_andblock!(ls::LoopSet, condop::Operation, LHS, RHS::Expr, elementbytes::Int)
33+
rhsop = add_compute!(ls, gensym(:iftruerhs), RHS, elementbytes)
34+
add_andblock!(ls, condop, LHS, rhsop, elementbytes)
35+
end
36+
function add_andblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int)
37+
rhsop = getop(ls, RHS)
38+
add_andblock!(ls, condop, LHS, rhsop, elementbytes)
39+
end
3340
function add_andblock!(ls::LoopSet, condexpr::Expr, condeval::Expr, elementbytes::Int)
3441
condop = add_compute!(ls, gensym(:mask), condexpr, elementbytes)
3542
@assert condeval.head === :(=)
@@ -42,8 +49,7 @@ function add_andblock!(ls::LoopSet, ex::Expr, elementbytes::Int)
4249
add_andblock!(ls, first(ex.args)::Expr, last(ex.args)::Expr, elementbytes)
4350
end
4451

45-
function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int)
46-
rhsop = add_compute!(ls, gensym(:iffalserhs), RHS, elementbytes)
52+
function add_orblock!(ls::LoopSet, condop::Operation, LHS, rhsop::Operation, elementbytes::Int)
4753
if LHS isa Symbol
4854
altop = getop(ls, LHS)
4955
return add_compute!(ls, LHS, :vifelse, [condop, altop, rhsop], elementbytes)
@@ -54,6 +60,14 @@ function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::In
5460
throw("Don't know how to assign onto $LHS.")
5561
end
5662
end
63+
function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS::Expr, elementbytes::Int)
64+
rhsop = add_compute!(ls, gensym(:iffalserhs), RHS, elementbytes)
65+
add_orblock!(ls, condop, LHS, rhsop, elementbytes)
66+
end
67+
function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int)
68+
rhsop = getop(ls, RHS)
69+
add_orblock!(ls, condop, LHS, rhsop, elementbytes)
70+
end
5771
function add_orblock!(ls::LoopSet, condexpr::Expr, condeval::Expr, elementbytes::Int)
5872
condop = add_compute!(ls, gensym(:mask), condexpr, elementbytes)
5973
@assert condeval.head === :(=)

src/add_loads.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,11 @@ function add_load_getindex!(ls::LoopSet, var::Symbol, ex::Expr, elementbytes::In
5050
add_load!(ls, var, array, rawindices, elementbytes)
5151
end
5252

53+
54+
struct LoopValue end
55+
@inline VectorizationBase.stridedpointer(::LoopValue) = LoopValue()
56+
@inline SIMDPirates.vload(::LoopValue, i::Tuple{_MM{W}}) where {W} = SVec(SIMDPirates.vrangeincr(Val{W}(), @inbounds(i[1].i), Val{1}()))
57+
@inline VectorizationBase.load(::LoopValue, i::Integer) = i + one(i)
58+
@inline VectorizationBase.load(::LoopValue, i::Tuple{I}) where {I<:Integer} = @inbounds(i[1]) + one(I)
59+
@inline Base.eltype(::LoopValue) = Int8
60+

src/add_stores.jl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,10 @@ end
7777
# For now, it is illegal to load from a conditional store.
7878
# if you want that sort of behavior, do a conditional reassignment, and store that result unconditionally.
7979
function add_conditional_store!(ls::LoopSet, LHS, condop::Operation, storeop::Operation, elementbytes::Int)
80-
array, raw_indices = ref_from_ref(LHS)
81-
ref = ArrayReference(array, raw_indices)
82-
mref = ArrayReferenceMeta(
83-
ref, fill(true, length(getindices(ref)))
84-
)
85-
ldref = convert(Vector{Symbol}, getindices(ref))
80+
array, rawindices = ref_from_ref(LHS)
81+
mpref = array_reference_meta!(ls, array, rawindices, elementbytes)
82+
mref = mpref.mref
83+
ldref = mpref.loopdependencies
8684

8785
pvar = storeop.variable
8886
id = length(ls.operations)

src/condense_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ function shifted_loopset(ls::LoopSet, loopsyms::Vector{Symbol})
7575
ld = zero(UInt64) # leading_zeros(ld) >> 2 yields the number of loopdeps
7676
for d loopsyms
7777
ld <<= 4
78-
ld |= getloopid(ls, d)
78+
ld |= getloopid(ls, d)::Int
7979
end
8080
ld
8181
end

src/determinestrategy.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ function solve_tilesize(X, R, UL, TL)
233233
end
234234
# The RR + 1 is a hack to get it to favor Uhigh in more scenarios
235235
Tl = Tlow
236-
while RR < Uhigh*Tl*R[1] + Uhigh*R[2]
236+
while RR < Uhigh*Tl*R[1] + Uhigh*R[2] && Tl > 1
237237
Tl -= 1
238238
end
239239
tcost_temp = tile_cost(X, Uhigh, Tl, UL, TL)
@@ -440,6 +440,7 @@ function evaluate_cost_tile(
440440
reg_pressure[4] += rp
441441
end
442442
end
443+
sum(reg_pressure) > VectorizationBase.REGISTER_COUNT && return 0, 0, Inf
443444
# @show order, vectorized cost_vec reg_pressure
444445
# @show solve_tilesize(ls, unrolled, tiled, cost_vec, reg_pressure)
445446
U, T, tcost = solve_tilesize(ls, unrolled, tiled, cost_vec, reg_pressure, W, vectorized)

src/lower_load.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ function lower_load_scalar!(
1818
ptr = refname(op)
1919
isunrolled = unrolled loopdeps
2020
U = isunrolled ? U : 1
21-
for u zero(Int32):Base.unsafe_trunc(Int32,U-1)
21+
for u 0:U-1
2222
varname = varassignname(var, u, isunrolled)
2323
td = UnrollArgs(u, unrolled, tiled, suffix)
2424
push!(q.args, Expr(:(=), varname, Expr(:call, lv(:load), ptr, mem_offset_u(op, td))))
@@ -32,16 +32,16 @@ function lower_load_vectorized!(
3232
loopdeps = loopdependencies(op)
3333
@assert vectorized loopdeps
3434
if unrolled loopdeps
35-
umin = zero(Int32)
35+
umin = 0
3636
U = U
3737
else
38-
umin = -one(Int32)
38+
umin = -1
3939
U = 0
4040
end
4141
# Urange = unrolled ∈ loopdeps ? 0:U-1 : 0
4242
var = variable_name(op, suffix)
4343
vecnotunrolled = vectorized !== unrolled
44-
for u umin:Base.unsafe_trunc(Int32,U-1)
44+
for u umin:U-1
4545
td = UnrollArgs(u, unrolled, tiled, suffix)
4646
pushvectorload!(q, op, var, td, U, W, mask, vecnotunrolled)
4747
end

src/lower_memory_common.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
struct UnrollArgs{T}
3-
u::Int32
3+
u::Int
44
unrolled::Symbol
55
tiled::Symbol
66
suffix::T
@@ -13,7 +13,7 @@ function parentind(ind::Symbol, op::Operation)
1313
end
1414
function symbolind(ind::Symbol, op::Operation, td::UnrollArgs)
1515
id = parentind(ind, op)
16-
id == -1 && return Expr(:call, :-, ind, one(Int32))
16+
id == -1 && return Expr(:call, :-, ind, 1)
1717
@unpack u, unrolled, tiled, suffix = td
1818
parent = parents(op)[id]
1919
pvar = if tiled loopdependencies(parent)
@@ -22,7 +22,7 @@ function symbolind(ind::Symbol, op::Operation, td::UnrollArgs)
2222
mangledvar(parent)
2323
end
2424
pvar = unrolled loopdependencies(parent) ? Symbol(pvar, u) : pvar
25-
Expr(:call, :-, pvar, one(Int32))
25+
Expr(:call, :-, pvar, 1)
2626
end
2727
function mem_offset(op::Operation, td::UnrollArgs)
2828
# @assert accesses_memory(op) "Computing memory offset only makes sense for operations that access memory."
@@ -103,7 +103,7 @@ end
103103
# Expr(:call, :+, q, incr)
104104
# end
105105
# end
106-
function varassignname(var::Symbol, u::Int32, isunrolled::Bool)
106+
function varassignname(var::Symbol, u::Int, isunrolled::Bool)
107107
isunrolled ? Symbol(var, u) : var
108108
end
109109
# name_memoffset only gets called when vectorized

src/lower_store.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ function lower_conditionalstore_scalar!(
5555
end
5656
condunrolled = unrolled loopdependencies(cond)
5757
ptr = refname(op)
58-
for u zero(Int32):Base.unsafe_trunc(Int32,U-1)
58+
for u 0:U-1
5959
varname = varassignname(var, u, isunrolled)
6060
condvarname = varassignname(condvar, u, condunrolled)
6161
td = UnrollArgs(u, unrolled, tiled, suffix)
@@ -71,10 +71,10 @@ function lower_conditionalstore_vectorized!(
7171
@assert unrolled loopdeps
7272
var = pvariable_name(op, suffix)
7373
if isunrolled
74-
umin = zero(Int32)
74+
umin = 0
7575
U = U
7676
else
77-
umin = -one(Int32)
77+
umin = -1
7878
U = 0
7979
end
8080
ptr = refname(op)
@@ -87,7 +87,7 @@ function lower_conditionalstore_vectorized!(
8787
end
8888
# @show parents(op) cond condvar
8989
condunrolled = unrolled loopdependencies(cond)
90-
for u zero(Int32):Base.unsafe_trunc(Int32,U-1)
90+
for u 0:U-1
9191
td = UnrollArgs(u, unrolled, tiled, suffix)
9292
name, mo = name_memoffset(var, op, td, W, vecnotunrolled)
9393
condvarname = varassignname(condvar, u, condunrolled)
@@ -107,7 +107,7 @@ function lower_store_scalar!(
107107
)
108108
var = pvariable_name(op, suffix)
109109
ptr = refname(op)
110-
for u zero(Int32):Base.unsafe_trunc(Int32,U-1)
110+
for u 0:U-1
111111
varname = varassignname(var, u, isunrolled)
112112
td = UnrollArgs(u, unrolled, tiled, suffix)
113113
push!(q.args, Expr(:call, lv(:store!), ptr, varname, mem_offset_u(op, td)))
@@ -122,15 +122,15 @@ function lower_store_vectorized!(
122122
@assert unrolled loopdeps
123123
var = pvariable_name(op, suffix)
124124
if isunrolled
125-
umin = zero(Int32)
125+
umin = 0
126126
U = U
127127
else
128-
umin = -one(Int32)
128+
umin = -1
129129
U = 0
130130
end
131131
ptr = refname(op)
132132
vecnotunrolled = vectorized !== unrolled
133-
for u zero(Int32):Base.unsafe_trunc(Int32,U-1)
133+
for u 0:U-1
134134
td = UnrollArgs(u, unrolled, tiled, suffix)
135135
name, mo = name_memoffset(var, op, td, W, vecnotunrolled)
136136
instrcall = Expr(:call, lv(:vstore!), ptr, name, mo)

0 commit comments

Comments
 (0)