Skip to content

Commit 6ddf6be

Browse files
committed
Add if/else support.
1 parent 0bc4916 commit 6ddf6be

File tree

11 files changed

+439
-144
lines changed

11 files changed

+439
-144
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.4.2"
4+
version = "0.4.3"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -14,8 +14,8 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1414
[compat]
1515
MacroTools = "0"
1616
Parameters = "0"
17-
SIMDPirates = "0.2"
18-
SLEEFPirates = "0.2"
17+
SIMDPirates = "0.3"
18+
SLEEFPirates = "0.3"
1919
VectorizationBase = "0.2"
2020
julia = "1.3"
2121

src/LoopVectorization.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ include("condense_loopset.jl")
3939
include("reconstruct_loopset.jl")
4040
include("constructors.jl")
4141

42-
# include("precompile.jl")
43-
# _precompile_()
42+
include("precompile.jl")
43+
_precompile_()
4444

4545
end # module

src/_avx.jl

Lines changed: 0 additions & 81 deletions
This file was deleted.

src/add_compute.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ function add_reduction!(
5959
get!(ls.opdict, var) do
6060
add_constant!(ls, var, elementbytes)
6161
end
62-
# pushparent!(parents, deps, reduceddeps, parent)
6362
end
6463
function search_tree(opv::Vector{Operation}, var::Symbol) # relies on cycles being forbidden
6564
for opp opv

src/add_ifelse.jl

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,7 @@ function add_if!(ls::LoopSet, LHS::Symbol, RHS::Expr, elementbytes::Int = 8, mpr
1919
add_compute!(ls, LHS, :vifelse, [condop, trueop, falseop], elementbytes)
2020
end
2121

22-
function add_andblock!(ls::LoopSet, ex::Expr)
23-
condop = add_compute!(ls, gensym(:mask), first(ex.args), elementbytes)
24-
condeval = last(ex.args)::Expr
25-
@assert condeval.head === :(=)
26-
@assert length(condeval.args) == 2
27-
LHS = condeval.args[1]
28-
RHS = condeval.args[2]
22+
function add_andblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int)
2923
rhsop = add_compute!(ls, gensym(:iftruerhs), RHS, elementbytes)
3024
if LHS isa Symbol
3125
altop = getop(ls, LHS)
@@ -36,22 +30,39 @@ function add_andblock!(ls::LoopSet, ex::Expr)
3630
throw("Don't know how to assign onto $LHS.")
3731
end
3832
end
39-
function add_orblock!(ls, ex::Expr)
40-
condop = add_compute!(ls, gensym(:mask), first(ex.args), elementbytes)
41-
condeval = last(ex.args)::Expr
33+
function add_andblock!(ls::LoopSet, condexpr::Expr, condeval::Expr, elementbytes::Int)
34+
condop = add_compute!(ls, gensym(:mask), condexpr, elementbytes)
4235
@assert condeval.head === :(=)
4336
@assert length(condeval.args) == 2
4437
LHS = condeval.args[1]
4538
RHS = condeval.args[2]
46-
rhsop = add_compute!(ls, gensym(:iftruerhs), RHS, elementbytes)
39+
add_andblock!(ls, condop, LHS, RHS, elementbytes)
40+
end
41+
function add_andblock!(ls::LoopSet, ex::Expr, elementbytes::Int)
42+
add_andblock!(ls, first(ex.args)::Expr, last(ex.args)::Expr, elementbytes)
43+
end
44+
45+
function add_orblock!(ls::LoopSet, condop::Operation, LHS, RHS, elementbytes::Int)
46+
rhsop = add_compute!(ls, gensym(:iffalserhs), RHS, elementbytes)
4747
if LHS isa Symbol
4848
altop = getop(ls, LHS)
4949
return add_compute!(ls, LHS, :vifelse, [condop, altop, rhsop], elementbytes)
5050
elseif LHS isa Expr && LHS.head === :ref
51-
negatedcondop = add_compute!(ls, gensym(:negated_mask), :vnot, [condop], elementbytes)
51+
negatedcondop = add_compute!(ls, gensym(:negated_mask), :~, [condop], elementbytes)
5252
return add_conditional_store!(ls, LHS, negatedcondop, rhsop, elementbytes)
5353
else
5454
throw("Don't know how to assign onto $LHS.")
55-
end
55+
end
56+
end
57+
function add_orblock!(ls::LoopSet, condexpr::Expr, condeval::Expr, elementbytes::Int)
58+
condop = add_compute!(ls, gensym(:mask), condexpr, elementbytes)
59+
@assert condeval.head === :(=)
60+
@assert length(condeval.args) == 2
61+
LHS = condeval.args[1]
62+
RHS = condeval.args[2]
63+
add_orblock!(ls, condop, LHS, RHS, elementbytes)
64+
end
65+
function add_orblock!(ls::LoopSet, ex::Expr, elementbytes::Int)
66+
add_orblock!(ls, first(ex.args)::Expr, last(ex.args)::Expr, elementbytes)
5667
end
5768

src/add_stores.jl

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,21 @@ function add_store!(
2020
ldref = mpref.loopdependencies
2121
reduceddeps = mpref.reduceddeps
2222
parent = getop(ls, var, ldref, elementbytes)
23-
# pushfirst!(parents, parent)
2423
pvar = parent.variable
25-
nops = length(ls.operations)
26-
id = nops
24+
id = length(ls.operations)
2725
if pvar ls.syms_aliasing_refs
2826
push!(ls.syms_aliasing_refs, pvar)
2927
push!(ls.refs_aliasing_syms, mpref.mref)
30-
# add_unique_store!(ls, mref, parents, ldref, reduceddeps, elementbytes)
3128
else
32-
# try to cse store
33-
# different from cse load, because the other op here must be a store
34-
ref = mpref.mref.ref
29+
# try to cse store, by replacing the previous one
30+
ref = mpref.mref.ref
3531
for opp operations(ls)
3632
isstore(opp) || continue
37-
if ref == opp.ref.ref# && return cse_store!(ls, identifier(opp), mref, parents, ldref, reduceddeps, elementbytes)
33+
if ref == opp.ref.ref
3834
id = opp.identifier
35+
break
3936
end
4037
end
41-
# add_unique_store!(ls, mref, parents, ldref, reduceddeps, elementbytes)
4238
end
4339
pushparent!(parents, ldref, reduceddeps, parent)
4440
op = Operation( id, name(mpref), elementbytes, :setindex!, memstore, mpref )#loopdependencies, reduceddeps, parents, mpref.mref )
@@ -71,14 +67,49 @@ end
7167
# For now, it is illegal to load from a conditional store.
7268
# if you want that sort of behavior, do a conditional reassignment, and store that result unconditionally.
7369
function add_conditional_store!(ls::LoopSet, LHS, condop::Operation, storeop::Operation, elementbytes::Int)
74-
array, raw_indices = ref_from_ref(ex)
70+
array, raw_indices = ref_from_ref(LHS)
7571
ref = ArrayReference(array, raw_indices)
7672
mref = ArrayReferenceMeta(
7773
ref, fill(true, length(getindices(ref)))
7874
)
79-
parents = [storeop, condop]
8075
ldref = convert(Vector{Symbol}, getindices(ref))
81-
op = Operation( ls, name(mref), elementbytes, :condtionalstore!, memstore, ldref, NODEPENDENCY, parents, mref )
82-
add_unique_store!(ls, op)
76+
77+
pvar = storeop.variable
78+
id = length(ls.operations)
79+
if pvar ls.syms_aliasing_refs
80+
push!(ls.syms_aliasing_refs, pvar)
81+
push!(ls.refs_aliasing_syms, mref)
82+
storeparents = [storeop, condop]
83+
else
84+
# for now, we don't try to cse the store
85+
# later, as an optimization, we could:
86+
# 1. cse the store
87+
# 2. use the mask to combine the vector we're trying to store here with the vector that would have been stored in the now cse-ed 1.
88+
# 3. use a regular (non-masked) store on that vector.
89+
ref = mpref.mref.ref
90+
for opp operations(ls)
91+
isstore(opp) || continue
92+
if ref == opp.ref.ref# && return cse_store!(ls, identifier(opp), mref, parents, ldref, reduceddeps, elementbytes)
93+
id = opp.identifier
94+
break
95+
end
96+
end
97+
if id != length(ls.operations) # then there was a previous store
98+
prevstore = getop(ls, id + 1)
99+
storeop = add_compute!(ls, gensym(:combinedstoreop), Instruction(:vifelse), [condop, storeop, first(parents(prevstore))], elementbytes)
100+
storeparents = [storeop]
101+
storeinstr = if prevstore.instruction.instr === :conditionalstore!
102+
push!(storeparents, add_compute!(ls, gensym(:combinedmask), Instruction(:|), [condop, last(parents(prevstore))], elementbytes))
103+
:conditionalstore!
104+
else
105+
:setindex!
106+
end
107+
op = Operation( id, name(mref), elementbytes, storeinstr, memstore, ldref, NODEPENDENCY, storeparents, mref )
108+
cse_store!(ls, op)
109+
end
110+
end
111+
112+
op = Operation( id, name(mref), elementbytes, :conditionalstore!, memstore, ldref, NODEPENDENCY, storeparents, mref )
113+
add_unique_store!(ls, op)
83114
end
84115

src/costs.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ const COST = Dict{Symbol,InstructionCost}(
113113
:vabs2 => InstructionCost(4,0.5),
114114
:(==) => InstructionCost(1, 0.5),
115115
:isequal => InstructionCost(1, 0.5),
116+
:(~) => InstructionCost(1, 0.5),
116117
:(&) => InstructionCost(1, 0.5),
117118
:(|) => InstructionCost(1, 0.5),
118119
:(>) => InstructionCost(1, 0.5),
@@ -139,6 +140,7 @@ const COST = Dict{Symbol,InstructionCost}(
139140
:sqrt_fast => InstructionCost(15,4.0,-2.0),
140141
:log => InstructionCost(20,20.0,40.0,20),
141142
:exp => InstructionCost(20,20.0,20.0,18),
143+
:^ => InstructionCost(40,40.0,40.0,26), # FIXME
142144
:sin => InstructionCost(18,15.0,68.0,23),
143145
:cos => InstructionCost(18,15.0,68.0,26),
144146
:sincos => InstructionCost(25,22.0,70.0,26),

src/graphs.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -374,13 +374,14 @@ function register_single_loop!(ls::LoopSet, looprange::Expr)
374374
N = gensym(Symbol(:loop, itersym))
375375
pushpreamble!(ls, Expr(:(=), N, Expr(:call, lv(:maybestaticlength), r.args[2])))
376376
Loop(itersym, 0, N)
377-
elseif f === :OneTo || f === Expr(:(.), :Base, :OneTo)
377+
elseif f === :OneTo || f == Expr(:(.), :Base, QuoteNode(:OneTo))
378378
otN = r.args[2]
379379
if otN isa Integer
380380
Loop(itersym, 0, otN)
381381
else
382+
otN isa Expr && maybestatic!(otN)
382383
N = gensym(Symbol(:loop, itersym))
383-
pushpreamble!(ls, Expr(:(=), N, maybestatic!(otN)))
384+
pushpreamble!(ls, Expr(:(=), N, otN))
384385
Loop(itersym, 0, N)
385386
end
386387
else
@@ -463,7 +464,7 @@ function add_operation!(
463464
add_compute!(ls, LHS_sym, RHS, elementbytes, LHS_ref)
464465
end
465466
elseif RHS.head === :if
466-
add_if!(ls, LHS, RHS, elementbytes, LHS_ref)
467+
add_if!(ls, LHS_sym, RHS, elementbytes, LHS_ref)
467468
else
468469
throw("Expression not recognized:\n$x")
469470
end
@@ -509,9 +510,9 @@ function Base.push!(ls::LoopSet, ex::Expr, elementbytes::Int = 8)
509510
elseif ex.head === :for
510511
add_loop!(ls, ex)
511512
elseif ex.head === :&&
512-
add_andblock!(ls, ex)
513+
add_andblock!(ls, ex, elementbytes)
513514
elseif ex.head === :||
514-
add_orblock!(ls, ex)
515+
add_orblock!(ls, ex, elementbytes)
515516
else
516517
throw("Don't know how to handle expression:\n$ex")
517518
end

src/lower_store.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ function lower_conditionalstore_scalar!(
4949
var = pvariable_name(op, suffix)
5050
cond = last(parents(op))
5151
condvar = if suffix === nothing || tiled loopdependencies(cond)
52-
pvariable_name(op, nothing)
52+
variable_name(cond, nothing)
5353
else
54-
pvariable_name(op, suffix)
54+
variable_name(cond, suffix)
5555
end
5656
condunrolled = unrolled loopdependencies(cond)
5757
ptr = refname(op)
@@ -81,10 +81,11 @@ function lower_conditionalstore_vectorized!(
8181
vecnotunrolled = vectorized !== unrolled
8282
cond = last(parents(op))
8383
condvar = if suffix === nothing || tiled loopdependencies(cond)
84-
pvariable_name(op, nothing)
84+
variable_name(cond, nothing)
8585
else
86-
pvariable_name(op, suffix)
86+
variable_name(cond, suffix)
8787
end
88+
# @show parents(op) cond condvar
8889
condunrolled = unrolled loopdependencies(cond)
8990
for u zero(Int32):Base.unsafe_trunc(Int32,U-1)
9091
td = UnrollArgs(u, unrolled, tiled, suffix)
@@ -143,7 +144,6 @@ function lower_store!(
143144
q::Expr, op::Operation, vectorized::Symbol, W::Symbol, unrolled::Symbol, tiled::Symbol, U::Int,
144145
suffix::Union{Nothing,Int}, mask::Union{Nothing,Symbol,Unsigned} = nothing
145146
)
146-
# @show unrolled, tiled, U
147147
isunrolled = unrolled loopdependencies(op)
148148
U = isunrolled ? U : 1
149149
if instruction(op).instr !== :conditionalstore!

0 commit comments

Comments
 (0)