Skip to content

Commit bf688f8

Browse files
committed
Some basic test cases now work.
1 parent 97165ad commit bf688f8

16 files changed

+70
-67
lines changed

src/LoopVectorization.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, data,
99
mask, pick_vector_width_val, MM,
1010
maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, vzero,
1111
Zero, maybestaticrange, offsetprecalc,
12-
maybestaticfirst, maybestaticlast, scalar_less, gep, gesp, pointerforcomparison, NativeTypes, staticmul,
13-
relu, stridedpointer, StridedPointer
12+
maybestaticfirst, maybestaticlast, scalar_less, gep, gesp, pointerforcomparison, NativeTypes,
13+
vfmadd, vfmsub, vfnmadd, vfnmsub, vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, vadd, vsub, vmul,
14+
relu, stridedpointer, StridedPointer,
15+
reduced_add, reduced_prod, reduce_to_add, reduce_to_prod, reduced_max, reduced_min, reduce_to_max, reduce_to_min
16+
1417
using IfElse: ifelse
1518

16-
const Static = StaticInt
17-
# missing: subsetview, stridedpointer_for_broadcast, unwrap, StaticUnitRange, stridedpointers, noalias!, gepbyte,
19+
# missing: stridedpointer_for_broadcast, noalias!, gepbyte,
1820
# using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange,
1921
# reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,
2022
# sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!,

src/add_ifelse.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@ function add_if!(ls::LoopSet, LHS::Symbol, RHS::Expr, elementbytes::Int, positio
3535
else
3636
falseop = getop(ls, iffalse, elementbytes)
3737
end
38-
add_compute!(ls, LHS, :vifelse, [condop, trueop, falseop], elementbytes)
38+
add_compute!(ls, LHS, :ifelse, [condop, trueop, falseop], elementbytes)
3939
end
4040

4141
function add_andblock!(ls::LoopSet, condop::Operation, LHS, rhsop::Operation, elementbytes::Int, position::Int)
4242
if LHS isa Symbol
4343
altop = getop(ls, LHS, elementbytes)
44-
return add_compute!(ls, LHS, :vifelse, [condop, rhsop, altop], elementbytes)
44+
return add_compute!(ls, LHS, :ifelse, [condop, rhsop, altop], elementbytes)
4545
elseif LHS isa Expr && LHS.head === :ref
4646
return add_conditional_store!(ls, LHS, condop, rhsop, elementbytes)
4747
else
@@ -78,10 +78,10 @@ function add_orblock!(ls::LoopSet, condop::Operation, LHS, rhsop::Operation, ele
7878
negatedcondop = negateop!(ls, condop, elementbytes)
7979
if LHS isa Symbol
8080
altop = getop(ls, LHS, elementbytes)
81-
# return add_compute!(ls, LHS, :vifelse, [condop, altop, rhsop], elementbytes)
81+
# return add_compute!(ls, LHS, :ifelse, [condop, altop, rhsop], elementbytes)
8282
# Placing altop second seems to let LLVM fuse operations; but as of LLVM 9.0.1 it will not if altop is first
8383
# therefore, we negate the condition and switch order so that the altop is second.
84-
return add_compute!(ls, LHS, :vifelse, [negatedcondop, rhsop, altop], elementbytes)
84+
return add_compute!(ls, LHS, :ifelse, [negatedcondop, rhsop, altop], elementbytes)
8585
elseif LHS isa Expr && LHS.head === :ref
8686
# negatedcondop = add_compute!(ls, gensym(:negated_mask), :~, [condop], elementbytes)
8787
return add_conditional_store!(ls, LHS, negatedcondop, rhsop, elementbytes)

src/add_stores.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ function add_conditional_store!(ls::LoopSet, LHS, condop::Operation, storeop::Op
113113
# prevstore = getop(ls, id + 1)
114114
# # @show prevstore prevstore.node_type, loopdependencies(prevstore)
115115
# # @show operations(ls)
116-
# storeop = add_compute!(ls, gensym(:combinedstoreop), Instruction(:vifelse), [condop, storeop, first(parents(prevstore))], elementbytes)
116+
# storeop = add_compute!(ls, gensym(:combinedstoreop), Instruction(:ifelse), [condop, storeop, first(parents(prevstore))], elementbytes)
117117
# storeparents = [storeop]
118118
# storeinstr = if prevstore.instruction.instr === :conditionalstore!
119119
# push!(storeparents, add_compute!(ls, gensym(:combinedmask), Instruction(:|), [condop, last(parents(prevstore))], elementbytes))

src/condense_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ function setup_call_inline(ls::LoopSet, inline::Int8 = zero(Int8), U::Int8 = zer
313313
instr = instruction(op)
314314
out = Symbol(mvar, 0)
315315
push!(outer_reducts.args, out)
316-
push!(q.args, Expr(:(=), var, Expr(:call, lv(reduction_scalar_combine(instr)), out, var)))
316+
push!(q.args, Expr(:(=), var, Expr(:call, lv(reduction_scalar_combine(instr)), Expr(:call, lv(:Vec), out), var)))
317317
end
318318
pushpreamble!(ls, outer_reducts)
319319
append!(ls.preamble.args, q.args)

src/costs.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ const COST = Dict{Symbol,InstructionCost}(
138138
# :vfdiv => InstructionCost(13,4.0,-2.0),
139139
# :vfdiv! => InstructionCost(13,4.0,-2.0),
140140
:div_fast => InstructionCost(13,4.0,-2.0),
141-
:evadd => InstructionCost(4,0.5),
142-
:evsub => InstructionCost(4,0.5),
143-
:evmul => InstructionCost(4,0.5),
141+
# :evadd => InstructionCost(4,0.5),
142+
# :evsub => InstructionCost(4,0.5),
143+
# :evmul => InstructionCost(4,0.5),
144144
# :evfdiv => InstructionCost(13,4.0,-2.0),
145145
:vsum => InstructionCost(6,2.0),
146146
:vprod => InstructionCost(6,2.0),
@@ -293,9 +293,9 @@ const REDUCTION_CLASS = Dict{Symbol,Float64}(
293293
# :vsub! => ADDITIVE_IN_REDUCTIONS,
294294
:vmul => MULTIPLICATIVE_IN_REDUCTIONS,
295295
# :vmul! => MULTIPLICATIVE_IN_REDUCTIONS,
296-
:evadd => ADDITIVE_IN_REDUCTIONS,
297-
:evsub => ADDITIVE_IN_REDUCTIONS,
298-
:evmul => MULTIPLICATIVE_IN_REDUCTIONS,
296+
# :evadd => ADDITIVE_IN_REDUCTIONS,
297+
# :evsub => ADDITIVE_IN_REDUCTIONS,
298+
# :evmul => MULTIPLICATIVE_IN_REDUCTIONS,
299299
:& => ALL,
300300
:| => ANY,
301301
:muladd => ADDITIVE_IN_REDUCTIONS,
@@ -329,7 +329,7 @@ reduction_instruction_class(instr::Symbol) = get(REDUCTION_CLASS, instr, NaN)
329329
reduction_instruction_class(instr::Instruction) = reduction_instruction_class(instr.instr)
330330
function reduction_to_single_vector(x::Float64)
331331
# x == 1.0 ? :evadd : x == 2.0 ? :evmul : x == 3.0 ? :vor : x == 4.0 ? :vand : x == 5.0 ? :max : x == 6.0 ? :min : throw("Reduction not found.")
332-
x == ADDITIVE_IN_REDUCTIONS ? :evadd : x == MULTIPLICATIVE_IN_REDUCTIONS ? :evmul : x == MAX ? :max : x == MIN ? :min : throw("Reduction not found.")
332+
x == ADDITIVE_IN_REDUCTIONS ? :vadd : x == MULTIPLICATIVE_IN_REDUCTIONS ? :vmul : x == MAX ? :max : x == MIN ? :min : throw("Reduction not found.")
333333
end
334334
reduction_to_single_vector(x) = reduction_to_single_vector(reduction_instruction_class(x))
335335
# function reduction_to_scalar(x::Float64)

src/graphs.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ function vec_looprange(loopmax, UF::Int, mangledname::Symbol, ptrcomp::Bool)
106106
end
107107
function vec_looprange(loopmax, UF::Int, mangledname, W)
108108
incr = if isone(UF)
109-
Expr(:call, lv(:valsub), W, 1)
109+
Expr(:call, lv(:vsub), W, :(Static{1}()))
110110
else
111-
Expr(:call, lv(:valmulsub), W, UF, 1)
111+
Expr(:call, lv(:vsub), Expr(:call, lv(:vmul), W, UF), :(Static{1}()))
112112
end
113113
compexpr = subexpr(loopmax, incr)
114114
Expr(:call, :<, mangledname, compexpr)
@@ -140,9 +140,9 @@ end
140140
function incrementloopcounter(us::UnrollSpecification, n::Int, mangledname::Symbol, UF::Int = unrollfactor(us, n))
141141
if isvectorized(us, n)
142142
if isone(UF)
143-
Expr(:(=), mangledname, Expr(:call, lv(:valadd), VECTORWIDTHSYMBOL, mangledname))
143+
Expr(:(=), mangledname, Expr(:call, lv(:vadd), VECTORWIDTHSYMBOL, mangledname))
144144
else
145-
Expr(:(=), mangledname, Expr(:call, lv(:valmuladd), VECTORWIDTHSYMBOL, UF, mangledname))
145+
Expr(:(=), mangledname, Expr(:call, lv(:vadd), Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, :(Static{$UF}())), mangledname))
146146
end
147147
else
148148
Expr(:(=), mangledname, Expr(:call, lv(:vadd), mangledname, UF))
@@ -153,7 +153,7 @@ function incrementloopcounter!(q, us::UnrollSpecification, n::Int, UF::Int = unr
153153
if isone(UF)
154154
push!(q.args, Expr(:call, lv(:Static), VECTORWIDTHSYMBOL))
155155
else
156-
push!(q.args, Expr(:call, lv(:valmul), VECTORWIDTHSYMBOL, Expr(:call, Expr(:curly, lv(:Static), UF))))
156+
push!(q.args, Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, Expr(:call, Expr(:curly, lv(:Static), UF))))
157157
end
158158
else
159159
push!(q.args, Expr(:call, Expr(:curly, lv(:Static), UF)))

src/loopstartstopmanager.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,12 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
187187
push!(index.args, stophint)
188188
elseif isvectorized
189189
if isone(sub)
190-
push!(index.args, Expr(:call, lv(:valsub), stophint, VECTORWIDTHSYMBOL))
190+
push!(index.args, Expr(:call, lv(:vsub), staticexpr(stophint), VECTORWIDTHSYMBOL))
191191
else
192-
push!(index.args, Expr(:call, lv(:vsub), stophint, Expr(:call, lv(:valmul), VECTORWIDTHSYMBOL, sub)))
192+
push!(index.args, Expr(:call, lv(:vsub), staticexpr(stophint), Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, staticexpr(sub))))
193193
end
194194
else
195-
push!(index.args, stophint - sub)
195+
push!(index.args, staticexpr(stophint - sub))
196196
end
197197
ptr = vptr(ar)
198198
return Expr(:call, lv(:pointerforcomparison), ptr, index)

src/lower_compute.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,12 @@ end
9797
function add_loopvalue!(instrcall::Expr, loopval::Symbol, vectorized::Symbol, u::Int)
9898
if loopval === vectorized
9999
if isone(u)
100-
push!(instrcall.args, Expr(:call, lv(:valadd), VECTORWIDTHSYMBOL, _MMind(Expr(:call, lv(:staticp1), loopval))))
100+
push!(instrcall.args, Expr(:call, lv(:vadd), VECTORWIDTHSYMBOL, _MMind(Expr(:call, lv(:staticp1), loopval))))
101101
else
102-
push!(instrcall.args, Expr(:call, lv(:valmuladd), VECTORWIDTHSYMBOL, u, _MMind(Expr(:call, lv(:staticp1), loopval))))
102+
push!(instrcall.args, Expr(:call, lv(:vadd), Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, u), _MMind(Expr(:call, lv(:staticp1), loopval))))
103103
end
104104
else
105-
push!(instrcall.args, Expr(:call, lv(:vadd), loopval, u + 1))
105+
push!(instrcall.args, Expr(:call, lv(:vadd), loopval, Expr(:call, Expr(:curly, :Static, u + 1))))
106106
end
107107
end
108108
function add_loopvalue!(instrcall::Expr, loopval, ua::UnrollArgs, u::Int)
@@ -246,10 +246,10 @@ function lower_compute!(
246246
end
247247
if maskreduct && (u == Uiter || u₁loopsym !== vectorized) # only mask last
248248
if last(instrcall.args) == varsym
249-
pushfirst!(instrcall.args, lv(:vifelse))
249+
pushfirst!(instrcall.args, lv(:ifelse))
250250
insert!(instrcall.args, 3, mask)
251251
elseif all(in(loopdependencies(op)), reduceddeps) || any(opp -> mangledvar(opp) === mangledvar(op), parents_op)
252-
push!(q.args, Expr(:(=), varsym, Expr(:call, lv(:vifelse), mask, instrcall, varsym)))
252+
push!(q.args, Expr(:(=), varsym, Expr(:call, lv(:ifelse), mask, instrcall, varsym)))
253253
continue
254254
end
255255
end

src/lower_load.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ function add_prefetches!(q::Expr, ls::LoopSet, op::Operation, td::UnrollArgs, pr
138138
if isone(u)
139139
inds.args[i] = Expr(:call, lv(:unwrap), VECTORWIDTHSYMBOL)
140140
else
141-
inds.args[i] = Expr(:call, lv(:valmul), VECTORWIDTHSYMBOL, u)
141+
inds.args[i] = Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, u)
142142
end
143143
else
144144
inds.args[i] = Expr(:call, Expr(:curly, lv(:Static), u))

src/lower_memory_common.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ staticexpr(x::Integer) = Expr(:call, Expr(:curly, lv(:Static), convert(Int, x)))
2323
staticexpr(x) = Expr(:call, lv(:Static), x)
2424
maybestatic(x::Integer) = staticexpr(x)
2525
maybestatic(x) = x
26-
_MMind(ind) = Expr(:call, lv(:_MM), VECTORWIDTHSYMBOL, ind)
27-
_MMind(ind::Integer) = Expr(:call, lv(:_MM), VECTORWIDTHSYMBOL, staticexpr(ind))
26+
_MMind(ind) = Expr(:call, lv(:MM), VECTORWIDTHSYMBOL, ind)
27+
_MMind(ind::Integer) = Expr(:call, lv(:MM), VECTORWIDTHSYMBOL, staticexpr(ind))
2828
function addoffset!(ret::Expr, ex, offset::Integer, _mm::Bool = false)
2929
if iszero(offset)
3030
if _mm
@@ -81,9 +81,9 @@ end
8181
function add_vectorized_offset!(ret::Expr, ind, offset, incr)
8282
if isone(incr)
8383
if iszero(offset)
84-
push!(ret.args, _MMind(Expr(:call, lv(:valadd), VECTORWIDTHSYMBOL, maybestatic(ind))))
84+
push!(ret.args, _MMind(Expr(:call, lv(:vadd), VECTORWIDTHSYMBOL, maybestatic(ind))))
8585
else
86-
push!(ret.args, _MMind(Expr(:call, lv(:vadd), ind, Expr(:call, lv(:valadd), VECTORWIDTHSYMBOL, staticexpr(offset)))))
86+
push!(ret.args, _MMind(Expr(:call, lv(:vadd), ind, Expr(:call, lv(:vadd), VECTORWIDTHSYMBOL, staticexpr(offset)))))
8787
end
8888
elseif iszero(incr)
8989
if iszero(offset)
@@ -92,17 +92,17 @@ function add_vectorized_offset!(ret::Expr, ind, offset, incr)
9292
addoffset!(ret, ind, offset, true)
9393
end
9494
elseif iszero(offset)
95-
push!(ret.args, _MMind(Expr(:call, lv(:staticmuladd), VECTORWIDTHSYMBOL, maybestatic(incr), maybestatic(ind))))
95+
push!(ret.args, _MMind(Expr(:call, lv(:vadd), Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, maybestatic(incr)), maybestatic(ind))))
9696
else
97-
push!(ret.args, _MMind(Expr(:call, lv(:vadd), ind, Expr(:call, lv(:staticmuladd), VECTORWIDTHSYMBOL, maybestatic(incr), staticexpr(offset)))))
97+
push!(ret.args, _MMind(Expr(:call, lv(:vadd), ind, Expr(:call, lv(:vadd), Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, maybestatic(incr)), staticexpr(offset)))))
9898
end
9999
end
100100
function add_vectorized_offset_unrolled!(ret::Expr, offset, incr)
101101
if isone(incr)
102102
if iszero(offset)
103103
push!(ret.args, _MMind(Expr(:call, lv(:Static), VECTORWIDTHSYMBOL)))
104104
else
105-
push!(ret.args, _MMind(Expr(:call, lv(:valadd), VECTORWIDTHSYMBOL, staticexpr(offset))))
105+
push!(ret.args, _MMind(Expr(:call, lv(:vadd), VECTORWIDTHSYMBOL, staticexpr(offset))))
106106
end
107107
elseif iszero(incr)
108108
if iszero(offset)
@@ -113,7 +113,7 @@ function add_vectorized_offset_unrolled!(ret::Expr, offset, incr)
113113
elseif iszero(offset)
114114
push!(ret.args, _MMind(Expr(:call, lv(:staticmul), VECTORWIDTHSYMBOL, maybestatic(incr))))
115115
else
116-
push!(ret.args, _MMind(Expr(:call, lv(:staticmuladd), VECTORWIDTHSYMBOL, maybestatic(incr), staticexpr(offset))))
116+
push!(ret.args, _MMind(Expr(:call, lv(:vadd), Expr(:call, lv(:vmul), VECTORWIDTHSYMBOL, maybestatic(incr)), staticexpr(offset))))
117117
end
118118
end
119119
function add_vectorized_offset!(ret::Expr, ind, offset, incr, unrolled)

0 commit comments

Comments
 (0)