Skip to content

Commit 07a97c2

Browse files
committed
Tests currently pass on VectorizationBase#master, but new features have not yet been tested. Will work on extended generated-function @avx to take advantage of type information.
1 parent 1433577 commit 07a97c2

12 files changed

+725
-462
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*~
88
src/#*#
99
tests/#*#
10-
benchmarks/#*#
10+
benchmark/#*#
1111
*.mem
1212
*.mod
1313
*.mod0

src/LoopVectorization.jl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ module LoopVectorization
22

33
using VectorizationBase, SIMDPirates, SLEEFPirates, MacroTools, Parameters
44
using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector_load_expr,
5-
mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valadd, valsub, _MM
5+
mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valadd, valsub, _MM,
6+
maybestaticlength, maybestaticsize, Static, staticm1
67
using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod
78
using Base.Broadcast: Broadcasted, DefaultArrayStyle
89
using LinearAlgebra: Adjoint, Transpose
@@ -17,6 +18,12 @@ export LowDimArray, stridedpointer, vectorizable,
1718
include("costs.jl")
1819
include("operations.jl")
1920
include("graphs.jl")
21+
include("memory_ops_common.jl")
22+
include("add_loads.jl")
23+
include("add_stores.jl")
24+
include("add_compute.jl")
25+
include("add_constants.jl")
26+
include("add_ifelse.jl")
2027
include("broadcast.jl")
2128
include("determinestrategy.jl")
2229
include("lowering.jl")

src/add_compute.jl

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
function addsetv!(s::AbstractVector{T}, v::T) where {T}
2+
for sᵢ s
3+
sᵢ === v && return nothing
4+
end
5+
push!(s, v)
6+
nothing
7+
end
8+
function mergesetv!(s1::AbstractVector{T}, s2::AbstractVector{T}) where {T}
9+
for s s2
10+
addsetv!(s1, s)
11+
end
12+
nothing
13+
end
14+
function mergesetdiffv!(
15+
s1::AbstractVector{T},
16+
s2::AbstractVector{T},
17+
s3::AbstractVector{T}
18+
) where {T}
19+
for s s2
20+
s s3 && addsetv!(s1, s)
21+
end
22+
nothing
23+
end
24+
function setdiffv!(s3::AbstractVector{T}, s1::AbstractVector{T}, s2::AbstractVector{T}) where {T}
25+
for s s1
26+
(s s2) || (s s3 && push!(s3, s))
27+
end
28+
end
29+
function update_deps!(deps::Vector{Symbol}, reduceddeps::Vector{Symbol}, parent::Operation)
30+
mergesetdiffv!(deps, loopdependencies(parent), reduceddependencies(parent))
31+
if !(isload(parent) || isconstant(parent)) && parent.instruction.instr (:reduced_add, :reduced_prod, :reduce_to_add, :reduce_to_prod)
32+
mergesetv!(reduceddeps, reduceddependencies(parent))
33+
end
34+
nothing
35+
end
36+
37+
function pushparent!(parents::Vector{Operation}, deps::Vector{Symbol}, reduceddeps::Vector{Symbol}, parent::Operation)
38+
push!(parents, parent)
39+
update_deps!(deps, reduceddeps, parent)
40+
end
41+
function pushparent!(mpref::ArrayReferenceMetaPosition, parent::Operation)
42+
pushparent!(mpref.parents, mpref.loopdependencies, mpref.reduceddeps, parent)
43+
end
44+
function add_parent!(
45+
parents::Vector{Operation}, deps::Vector{Symbol}, reduceddeps::Vector{Symbol}, ls::LoopSet, var, elementbytes::Int = 8
46+
)
47+
parent = if var isa Symbol
48+
getop(ls, var, elementbytes)
49+
elseif var isa Expr #CSE candidate
50+
add_operation!(ls, gensym(:temporary), var, elementbytes)
51+
else # assumed constant
52+
add_constant!(ls, var, elementbytes)
53+
end
54+
pushparent!(parents, deps, reduceddeps, parent)
55+
end
56+
function add_reduction!(
57+
parents::Vector{Operation}, deps::Vector{Symbol}, reduceddeps::Vector{Symbol}, ls::LoopSet, var::Symbol, elementbytes::Int = 8
58+
)
59+
get!(ls.opdict, var) do
60+
add_constant!(ls, var, elementbytes)
61+
end
62+
# pushparent!(parents, deps, reduceddeps, parent)
63+
end
64+
function add_reduction_update_parent!(
65+
parents::Vector{Operation}, deps::Vector{Symbol}, reduceddeps::Vector{Symbol}, ls::LoopSet,
66+
var::Symbol, instr::Symbol, elementbytes::Int = 8
67+
)
68+
parent = getop(ls, var, elementbytes)
69+
isloopconstant = parent.instruction === LOOPCONSTANT
70+
Instr = Instruction(instr)
71+
# if parent is not an outer reduction...
72+
if !isloopconstant
73+
# and parent is not a reduction_zero
74+
reduct_zero = REDUCTION_ZERO[Instr]
75+
reductcombine = REDUCTION_SCALAR_COMBINE[Instr].name
76+
reductsym = gensym(:reduction)
77+
reductinit = add_constant!(ls, Expr(:call, reduct_zero, ls.T), loopdependencies(parent), reductsym, reduct_zero, elementbytes)
78+
if isconstant(parent) && reduct_zero === parent.instruction.mod #we can use parent op as initialization.
79+
reductcombine = REDUCTION_COMBINETO[reductcombine]
80+
# else # we cannot use parent op as initialization.
81+
end
82+
else
83+
reductinit = parent
84+
reductsym = var
85+
reductcombine = Symbol("")
86+
end
87+
# mergesetv!(reduceddeps, deps)
88+
# if length(reduceddependencies(reductinit)) == 0
89+
# setdiffv!(reduceddeps, deps, loopdependencies(reductinit))
90+
# else
91+
setdiffv!(reduceddeps, deps, loopdependencies(reductinit))
92+
# end
93+
# mergesetv!(reduceddependencies(reductinit), reduceddeps)
94+
pushparent!(parents, deps, reduceddeps, reductinit)#parent) # deps and reduced deps will not be disjoint
95+
op = Operation(length(operations(ls)), reductsym, elementbytes, instr, compute, deps, reduceddeps, parents)
96+
parent.instruction === LOOPCONSTANT && push!(ls.outer_reductions, identifier(op))
97+
opout = pushop!(ls, op, var) # note this overwrites the entry in the operations dict, but not the vector
98+
isloopconstant && return opout
99+
# create child
100+
childdeps = Symbol[]; childrdeps = Symbol[]; childparents = Operation[]
101+
pushparent!(childparents, childdeps, childrdeps, op) # reduce op
102+
pushparent!(childparents, childdeps, childrdeps, parent) # to
103+
child = Operation(
104+
length(operations(ls)), name(parent), elementbytes, reductcombine, compute, childdeps, childrdeps, childparents
105+
)
106+
pushop!(ls, child, name(parent))
107+
end
108+
function add_compute!(
109+
ls::LoopSet, var::Symbol, ex::Expr, elementbytes::Int = 8,
110+
mpref::Union{Nothing,ArrayReferenceMetaPosition} = nothing
111+
)
112+
@assert ex.head === :call
113+
instr = instruction(first(ex.args))::Symbol
114+
args = @view(ex.args[2:end])
115+
parents = Operation[]
116+
deps = Symbol[]
117+
reduceddeps = Symbol[]
118+
reduction = false
119+
for arg args
120+
if var === arg
121+
reduction = true
122+
add_reduction!(parents, deps, reduceddeps, ls, arg, elementbytes)
123+
elseif arg isa Expr
124+
isref, argref = tryrefconvert(ls, arg, elementbytes)
125+
if isref
126+
if mpref == argref
127+
reduction = true
128+
add_load!(ls, var, mpref, elementbytes)
129+
else
130+
pushparent!(parents, deps, reduceddeps, add_load!(ls, gensym(:tempload), argref, elementbytes))
131+
end
132+
else
133+
add_parent!(parents, deps, reduceddeps, ls, arg, elementbytes)
134+
end
135+
else
136+
add_parent!(parents, deps, reduceddeps, ls, arg, elementbytes)
137+
end
138+
end
139+
if reduction # arg[reduction] is the reduction
140+
add_reduction_update_parent!(parents, deps, reduceddeps, ls, var, instr, elementbytes)
141+
else
142+
op = Operation(length(operations(ls)), var, elementbytes, instr, compute, deps, reduceddeps, parents)
143+
pushop!(ls, op, var)
144+
end
145+
end
146+
147+
function add_compute!(
148+
ls::LoopSet, LHS::Symbol, instr, parents::Vector{Operation}, elementbytes
149+
)
150+
deps = Symbol[]
151+
reduceddeps = Symbol[]
152+
foreach(parent -> update_deps!(deps, reduceddeps, parent), parents)
153+
op = Operation(length(operations(ls)), LHS, elementbytes, instr, compute, deps, reduceddeps, parents)
154+
pushop!(ls, op, LHS)
155+
end
156+

src/add_constants.jl

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
function add_constant!(ls::LoopSet, var::Symbol, elementbytes::Int = 8)
2+
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS)
3+
pushpreamble!(ls, op, mangledvar(op))
4+
pushop!(ls, op, var)
5+
end
6+
function add_constant!(ls::LoopSet, var, elementbytes::Int = 8)
7+
sym = gensym(:temp)
8+
op = Operation(length(operations(ls)), sym, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS)
9+
pushpreamble!(ls, Expr(:(=), mangledvar(op), var))
10+
pushpreamble!(ls, op, mangledvar(op))
11+
pushop!(ls, op, sym)
12+
end
13+
function add_constant!(ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int)
14+
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS, mpref.mref)
15+
add_vptr!(ls, op)
16+
pushpreamble!(ls, Expr(:(=), mangledvar(op), Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(zero(Int32), Symbol(""), Symbol(""), nothing)))))
17+
pushpreamble!(ls, op, mangledvar(op))
18+
pushop!(ls, op, var)
19+
end
20+
# This version has loop dependencies. var gets assigned to sym when lowering.
21+
function add_constant!(ls::LoopSet, var::Symbol, deps::Vector{Symbol}, sym::Symbol = gensym(:constant), f::Symbol = Symbol(""), elementbytes::Int = 8)
22+
# length(deps) == 0 && push!(ls.preamble.args, Expr(:(=), sym, var))
23+
pushop!(ls, Operation(length(operations(ls)), sym, elementbytes, Instruction(f,var), constant, deps, NODEPENDENCY, NOPARENTS), sym)
24+
end
25+
26+
function add_constant!(
27+
ls::LoopSet, var, deps::Vector{Symbol}, sym::Symbol = gensym(:constant), f::Symbol = Symbol(""), elementbytes::Int = 8
28+
)
29+
sym2 = gensym(:temp) # hack, passing meta info here
30+
op = Operation(length(operations(ls)), sym, elementbytes, Instruction(f, sym2), constant, deps, NODEPENDENCY, NOPARENTS)
31+
# @show f, sym, name(op), mangledvar(op)
32+
# temp = gensym(:temp2)
33+
# pushpreamble!(ls, Expr(:(=), temp, var))
34+
pushpreamble!(ls, op, var)#temp)
35+
pushop!(ls, op, sym)
36+
end

src/add_ifelse.jl

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
2+
## Currently, if/else will create its own local scope
3+
## Assignments will not register in the loop's main scope
4+
## although stores and return values will.
5+
6+
7+
function add_if!(ls::LoopSet, LHS::Symbol, RHS::Expr, elementbytes::Int = 8, mpref::Union{Nothing,ArrayReferenceMetaPosition} = nothing)
8+
# for now, just simple 1-liners
9+
@assert length(RHS.args) == 3 "if statements without an else cannot be assigned to a variable."
10+
condition = first(RHS.args)
11+
condop = add_compute!(ls, gensym(:mask), condition, elementbytes, mpref)
12+
iftrue = RHS.args[2]
13+
(iftrue isa Expr && iftrue.head !== :call) && throw("Only calls or constant expressions are currently supported in if/else blocks.")
14+
trueop = add_operation!(ls, Symbol(:iftrue), iftrue, elementbytes)
15+
iffalse = RHS.args[3]
16+
(iffalse isa Expr && iffalse.head !== :call) && throw("Only calls or constant expressions are currently supported in if/else blocks.")
17+
falseop = add_operation!(ls, Symbol(:iffalse), iffalse, elementbytes)
18+
19+
add_compute!(ls, LHS, :vifelse, [condop, trueop, falseop], elementbytes)
20+
end
21+
22+
function add_andblock!(ls::LoopSet, ex::Expr)
23+
condop = add_compute!(ls, gensym(:mask), first(ex.args), elementbytes)
24+
condeval = last(ex.args)::Expr
25+
@assert condeval.head === :(=)
26+
@assert length(condeval.args) == 2
27+
LHS = condeval.args[1]
28+
RHS = condeval.args[2]
29+
rhsop = add_compute!(ls, gensym(:iftruerhs), RHS, elementbytes)
30+
if LHS isa Symbol
31+
altop = getop(ls, LHS)
32+
return add_compute!(ls, LHS, :vifelse, [condop, rhsop, altop], elementbytes)
33+
elseif LHS isa Expr && LHS.head === :ref
34+
return add_conditional_store!(ls, LHS, condop, rhsop, elementbytes)
35+
else
36+
throw("Don't know how to assign onto $LHS.")
37+
end
38+
end
39+
function add_orblock!(ls, ex::Expr)
40+
condop = add_compute!(ls, gensym(:mask), first(ex.args), elementbytes)
41+
condeval = last(ex.args)::Expr
42+
@assert condeval.head === :(=)
43+
@assert length(condeval.args) == 2
44+
LHS = condeval.args[1]
45+
RHS = condeval.args[2]
46+
rhsop = add_compute!(ls, gensym(:iftruerhs), RHS, elementbytes)
47+
if LHS isa Symbol
48+
altop = getop(ls, LHS)
49+
return add_compute!(ls, LHS, :vifelse, [condop, altop, rhsop], elementbytes)
50+
elseif LHS isa Expr && LHS.head === :ref
51+
negatedcondop = add_compute!(ls, gensym(:negated_mask), :vnot, [condop], elementbytes)
52+
return add_conditional_store!(ls, LHS, negatedcondop, rhsop, elementbytes)
53+
else
54+
throw("Don't know how to assign onto $LHS.")
55+
end
56+
end
57+

src/add_loads.jl

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
2+
function add_load!(
3+
ls::LoopSet, var::Symbol, array::Symbol, rawindices, elementbytes::Int = 8
4+
)
5+
mpref = array_reference_meta!(ls, array, rawindices, elementbytes)
6+
add_load!(ls, var, mpref, elementbytes)
7+
end
8+
function add_load!(
9+
ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int = 8
10+
)
11+
length(mpref.loopdependencies) == 0 && return add_constant!(ls, var, mpref, elementbytes)
12+
ref = mpref.mref.ref
13+
# try to CSE
14+
id = findfirst(r -> r == ref, ls.refs_aliasing_syms)
15+
if id === nothing
16+
push!(ls.syms_aliasing_refs, var)
17+
push!(ls.refs_aliasing_syms, ref)
18+
else
19+
opp = getop(ls, ls.syms_aliasing_refs[id], elementbytes)
20+
return isstore(opp) ? getop(ls, first(parents(opp))) : opp
21+
end
22+
# else, don't
23+
op = Operation( ls, var, elementbytes, :getindex, memload, mpref )
24+
add_vptr!(ls, op)
25+
pushop!(ls, op, var)
26+
end
27+
28+
# for use with broadcasting
29+
function add_simple_load!(
30+
ls::LoopSet, var::Symbol, ref::ArrayReference, elementbytes::Int = 8
31+
)
32+
# if ref.loaded[] == true
33+
# op = getop(ls, var, elementbytes)
34+
# @assert var === op.variable
35+
# return op
36+
# end
37+
# loopset = keys(ls.loops)
38+
# loopdeps = Symbol[s for s ∈ loopdependencies(ref) if (s isa Symbol && s ∈ loopset)]
39+
loopdeps = Symbol[s for s ref.indices]
40+
mref = ArrayReferenceMeta(
41+
ref, fill(true, length(loopdeps))
42+
)
43+
op = Operation(
44+
length(operations(ls)), var, elementbytes,
45+
:getindex, memload, loopdeps,
46+
NODEPENDENCY, NOPARENTS, mref
47+
)
48+
add_vptr!(ls, op)
49+
pushop!(ls, op, var)
50+
end
51+
function add_load_ref!(ls::LoopSet, var::Symbol, ex::Expr, elementbytes::Int = 8)
52+
array, rawindices = ref_from_ref(ex)
53+
add_load!(ls, var, array, rawindices, elementbytes)
54+
end
55+
function add_load_getindex!(ls::LoopSet, var::Symbol, ex::Expr, elementbytes::Int = 8)
56+
array, rawindices = ref_from_getindex(ex)
57+
add_load!(ls, var, array, rawindices, elementbytes)
58+
end
59+

0 commit comments

Comments
 (0)