Skip to content

Commit 7342b77

Browse files
committed
When _avx_! isn't inlined, cut out a few assembly instructions from setting up the call
1 parent 1be4aac commit 7342b77

File tree

5 files changed

+69
-43
lines changed

5 files changed

+69
-43
lines changed

src/add_compute.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ function add_reduction_update_parent!(
184184
if reduct_zero === :zero
185185
push!(ls.preamble_zeros, (identifier(reductinit), IntOrFloat))
186186
else
187-
push!(ls.preamble_funcofeltypes, (identifier(reductinit), reduct_zero))
187+
push!(ls.preamble_funcofeltypes, (identifier(reductinit), instrclass))
188188
end
189189
else
190190
reductinit = parent
@@ -384,7 +384,7 @@ function add_pow!(
384384
end
385385
if pint == 0
386386
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS)
387-
push!(ls.preamble_funcofeltypes, (identifier(op),:one))
387+
push!(ls.preamble_funcofeltypes, (identifier(op),MULTIPLICATIVE_IN_REDUCTIONS))
388388
return pushop!(ls, op)
389389
elseif pint == 1
390390
return add_compute!(ls, var, :identity, [xop], elementbytes)

src/condense_loopset.jl

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -145,16 +145,25 @@ function loop_boundaries(ls::LoopSet)
145145
lbd
146146
end
147147

148+
tuple_expr(v) = tuple_expr(identity, v)
149+
function tuple_expr(f, v)
150+
t = Expr(:tuple)
151+
for vᵢ v
152+
push!(t.args, f(vᵢ))
153+
end
154+
t
155+
end
156+
148157
function argmeta_and_consts_description(ls::LoopSet, arraysymbolinds)
149158
Expr(
150-
:curly, :Tuple,
159+
:tuple,
151160
length(arraysymbolinds),
152-
Expr(:curly, :Tuple, ls.outer_reductions...),
153-
Expr(:curly, :Tuple, first.(ls.preamble_symsym)...),
154-
Expr(:curly, :Tuple, ls.preamble_symint...),
155-
Expr(:curly, :Tuple, ls.preamble_symfloat...),
156-
Expr(:curly, :Tuple, ls.preamble_zeros...),
157-
Expr(:curly, :Tuple, ls.preamble_funcofeltypes...)
161+
tuple_expr(ls.outer_reductions),
162+
tuple_expr(first, ls.preamble_symsym),
163+
tuple_expr(ls.preamble_symint),
164+
tuple_expr(ls.preamble_symfloat),
165+
tuple_expr(ls.preamble_zeros),
166+
tuple_expr(ls.preamble_funcofeltypes)
158167
)
159168
end
160169

@@ -210,9 +219,10 @@ function check_if_empty(ls::LoopSet, q::Expr)
210219
Expr(:if, Expr(:call, :!, Expr(:call, :any, :isempty, lb)), q)
211220
end
212221

222+
val(x) = Expr(:call, Expr(:curly, :Val, x))
213223
# Try to condense in type stable manner
214224
function generate_call(ls::LoopSet, inline_unroll::NTuple{3,Int8}, debug::Bool = false)
215-
operation_descriptions = Expr(:curly, :Tuple)
225+
operation_descriptions = Expr(:tuple)
216226
varnames = Symbol[]; ids = Vector{Int}(undef, length(operations(ls)))
217227
for op operations(ls)
218228
instr = instruction(op)
@@ -221,27 +231,29 @@ function generate_call(ls::LoopSet, inline_unroll::NTuple{3,Int8}, debug::Bool =
221231
push!(operation_descriptions.args, OperationStruct!(varnames, ids, ls, op))
222232
end
223233
arraysymbolinds = Symbol[]
224-
arrayref_descriptions = Expr(:curly, :Tuple)
234+
arrayref_descriptions = Expr(:tuple)
225235
foreach(ref -> push!(arrayref_descriptions.args, ArrayRefStruct(ls, ref, arraysymbolinds, ids)), ls.refs_aliasing_syms)
226236
argmeta = argmeta_and_consts_description(ls, arraysymbolinds)
227237
loop_bounds = loop_boundaries(ls)
228-
loop_syms = Expr(:curly, :Tuple, map(QuoteNode, ls.loopsymbols)...)
238+
loop_syms = tuple_expr(QuoteNode, ls.loopsymbols)
229239
inline, u₁, u₂ = inline_unroll
230-
231240
func = debug ? lv(:_avx_loopset_debug) : lv(:_avx_!)
232241
lbarg = debug ? Expr(:call, :typeof, loop_bounds) : loop_bounds
233242
q = Expr(
234-
:call, func, Expr(:call, Expr(:curly, :Val, Expr(:tuple, inline, u₁, u₂, Expr(:call, lv(:unwrap), VECTORWIDTHSYMBOL)))),
235-
operation_descriptions, arrayref_descriptions, argmeta, loop_syms, lbarg
243+
:call, func, val(Expr(:tuple, inline, u₁, u₂, Expr(:call, lv(:unwrap), VECTORWIDTHSYMBOL))),
244+
val(operation_descriptions), val(arrayref_descriptions), val(argmeta), val(loop_syms), lbarg
236245
)
237-
debug && deleteat!(q.args, 2)
238-
foreach(ref -> push!(q.args, vptr(ref)), ls.refs_aliasing_syms)
246+
# debug && deleteat!(q.args, 2)
247+
vargs_as_tuple = !debug
248+
extra_args = vargs_as_tuple ? Expr(:tuple) : q
249+
foreach(ref -> push!(extra_args.args, vptr(ref)), ls.refs_aliasing_syms)
239250

240-
foreach(is -> push!(q.args, last(is)), ls.preamble_symsym)
241-
append!(q.args, arraysymbolinds)
242-
add_reassigned_syms!(q, ls)
243-
add_external_functions!(q, ls)
244-
debug && return q
251+
foreach(is -> push!(extra_args.args, last(is)), ls.preamble_symsym)
252+
append!(extra_args.args, arraysymbolinds)
253+
add_reassigned_syms!(extra_args, ls)
254+
add_external_functions!(extra_args, ls)
255+
# debug && return q
256+
vargs_as_tuple && push!(q.args, extra_args)
245257
vecwidthdefq = Expr(:block)
246258
define_eltype_vec_width!(vecwidthdefq, ls, nothing)
247259
Expr(:block, vecwidthdefq, q)
@@ -318,7 +330,6 @@ function setup_call_inline(ls::LoopSet, inline::Int8 = zero(Int8), U::Int8 = zer
318330
end
319331
function setup_call_debug(ls::LoopSet)
320332
# avx_loopset(instr, ops, arf, AM, LB, vargs)
321-
322333
pushpreamble!(ls, generate_call(ls, (zero(Int8),zero(Int8),zero(Int8)), true))
323334
Expr(:block, ls.prepreamble, ls.preamble)
324335
end

src/graphs.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ struct LoopSet
253253
preamble_symint::Vector{Tuple{Int,Int}}
254254
preamble_symfloat::Vector{Tuple{Int,Float64}}
255255
preamble_zeros::Vector{Tuple{Int,NumberType}}
256-
preamble_funcofeltypes::Vector{Tuple{Int,Symbol}}
256+
preamble_funcofeltypes::Vector{Tuple{Int,Float64}}
257257
includedarrays::Vector{Symbol}
258258
includedactualarrays::Vector{Symbol}
259259
syms_aliasing_refs::Vector{Symbol}
@@ -308,7 +308,7 @@ function pushpreamble!(ls::LoopSet, op::Operation, v::Number)
308308
if iszero(v)
309309
push!(ls.preamble_zeros, (id, typ))
310310
elseif isone(v)
311-
push!(ls.preamble_funcofeltypes, (id, :one))
311+
push!(ls.preamble_funcofeltypes, (id, MULTIPLICATIVE_IN_REDUCTIONS))
312312
elseif v isa Integer
313313
push!(ls.preamble_symint, (id, convert(Int,v)))
314314
else
@@ -321,7 +321,7 @@ pushpreamble!(ls::LoopSet, ex::Expr) = push!(ls.preamble.args, ex)
321321
# if RHS.head === :call && first(RHS.args) === :zero
322322
# push!(ls.preamble_zeros, (identifier(op), IntOrFloat))
323323
# elseif RHS.head === :call && first(RHS.args) === :one
324-
# push!(ls.preamble_funcofeltypes, (identifier(op), :one))
324+
# push!(ls.preamble_funcofeltypes, (identifier(op), MULTIPLICATIVE_IN_REDUCTIONS))
325325
# else
326326
# pushpreamble!(ls, Expr(:(=), c, RHS))
327327
# pushpreamble!(ls, op, c)
@@ -670,7 +670,7 @@ function add_operation!(
670670
if f === :zero
671671
push!(ls.preamble_zeros, (identifier(op), IntOrFloat))
672672
else
673-
push!(ls.preamble_funcofeltypes, (identifier(op), :one))
673+
push!(ls.preamble_funcofeltypes, (identifier(op), MULTIPLICATIVE_IN_REDUCTIONS))
674674
end
675675
op
676676
else
@@ -706,7 +706,7 @@ function add_operation!(
706706
if f === :zero
707707
push!(ls.preamble_zeros, (identifier(op), IntOrFloat))
708708
else
709-
push!(ls.preamble_funcofeltypes, (identifier(op), :one))
709+
push!(ls.preamble_funcofeltypes, (identifier(op), MULTIPLICATIVE_IN_REDUCTIONS))
710710
end
711711
op
712712
else

src/lower_constant.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ function lower_licm_constants!(ls::LoopSet)
165165
end
166166
end
167167
for (id,f) ls.preamble_funcofeltypes
168-
setop!(ls, ops[id], Expr(:call, f, ELTYPESYMBOL))
168+
setop!(ls, ops[id], Expr(:call, reduction_zero(f), ELTYPESYMBOL))
169169
end
170170
end
171171

src/reconstruct_loopset.jl

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -198,24 +198,24 @@ end
198198
function num_parameters(AM)
199199
num_param::Int = AM[1]
200200
# num_param += length(AM[2].parameters)
201-
num_param + length(AM[3].parameters)
201+
num_param + length(AM[3])
202202
end
203203
function gen_array_syminds(AM)
204204
Symbol[Symbol("##arraysymbolind##"*i*'#') for i 1:(AM[1])::Int]
205205
end
206206
function process_metadata!(ls::LoopSet, AM, num_arrays::Int)
207207
opoffsets = ls.operation_offsets
208-
expandbyoffset!(ls.outer_reductions, AM[2].parameters, opoffsets)
209-
for (i,si) enumerate(AM[3].parameters)
208+
expandbyoffset!(ls.outer_reductions, AM[2], opoffsets)
209+
for (i,si) enumerate(AM[3])
210210
sii = si::Int
211211
s = gensym(:symlicm)
212212
push!(ls.preamble_symsym, (opoffsets[sii] + 1, s))
213213
pushpreamble!(ls, Expr(:(=), s, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:ref, :vargs, num_arrays + i))))
214214
end
215-
expandbyoffset!(ls.preamble_symint, AM[4].parameters, opoffsets)
216-
expandbyoffset!(ls.preamble_symfloat, AM[5].parameters, opoffsets)
217-
expandbyoffset!(ls.preamble_zeros, AM[6].parameters, opoffsets)
218-
expandbyoffset!(ls.preamble_funcofeltypes, AM[7].parameters, opoffsets)
215+
expandbyoffset!(ls.preamble_symint, AM[4], opoffsets)
216+
expandbyoffset!(ls.preamble_symfloat, AM[5], opoffsets)
217+
expandbyoffset!(ls.preamble_zeros, AM[6], opoffsets)
218+
expandbyoffset!(ls.preamble_funcofeltypes, AM[7], opoffsets)
219219
nothing
220220
end
221221
function expandbyoffset!(indexpand::Vector{T}, inds, offsets::Vector{Int}, expand::Bool = true) where {T <: Union{Int,Tuple{Int,<:Any}}}
@@ -437,7 +437,7 @@ function sizeofeltypes(v, num_arrays)::Int
437437
end
438438

439439
function avx_loopset(instr::Vector{Instruction}, ops::Vector{OperationStruct}, arf::Vector{ArrayRefStruct},
440-
AM::Core.SimpleVector, LPSYM::Core.SimpleVector, LB::Core.SimpleVector, @nospecialize(vargs))
440+
AM::Vector{Any}, LPSYM::Vector{Any}, LB::Core.SimpleVector, @nospecialize(vargs))
441441
ls = LoopSet(:LoopVectorization)
442442
num_arrays = length(arf)
443443
elementbytes = sizeofeltypes(vargs, num_arrays)
@@ -464,18 +464,33 @@ function avx_body(ls::LoopSet, UNROLL::Tuple{Int8,Int8,Int8,Int})
464464
q
465465
end
466466

467-
function _avx_loopset_debug(::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, ::Type{LB}, vargs...) where {OPS, ARF, AM, LPSYM, LB}
467+
function _avx_loopset_debug(::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, ::Type{LB}, vargs...) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
468468
@show OPS ARF AM LPSYM LB vargs
469-
_avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, typeof.(vargs))
469+
inline, u₁, u₂, W = UNROLL
470+
ls = _avx_loopset(OPS, ARF, AM, LPSYM, LB.parameters, typeof.(vargs))
471+
ls.vector_width[] = W
472+
ls
473+
end
474+
function tovector(@nospecialize(t))
475+
v = Vector{Any}(undef, length(t))
476+
for i eachindex(v)
477+
tᵢ = t[i]
478+
if tᵢ isa Tuple # reduce specialization?
479+
v[i] = tovector(tᵢ)
480+
else
481+
v[i] = tᵢ
482+
end
483+
end
484+
v
470485
end
471-
function _avx_loopset(OPSsv::Core.SimpleVector, ARFsv::Core.SimpleVector, AMsv::Core.SimpleVector, LPSYMsv::Core.SimpleVector, LBsv::Core.SimpleVector, @nospecialize(vargs))
486+
function _avx_loopset(@nospecialize(OPSsv), @nospecialize(ARFsv), @nospecialize(AMsv), @nospecialize(LPSYMsv), LBsv::Core.SimpleVector, @nospecialize(vargs))
472487
nops = length(OPSsv) ÷ 3
473488
instr = Instruction[Instruction(OPSsv[3i+1], OPSsv[3i+2]) for i 0:nops-1]
474489
ops = OperationStruct[ OPSsv[3i] for i 1:nops ]
475490
avx_loopset(
476491
instr, ops,
477492
ArrayRefStruct[ARFsv...],
478-
AMsv, LPSYMsv, LBsv, vargs
493+
tovector(AMsv), tovector(LPSYMsv), LBsv, vargs
479494
)
480495
end
481496
"""
@@ -497,9 +512,9 @@ Execute an `@avx` block. The block's code is represented via the arguments:
497512
`StaticLowerUnitRange(1)` because the lower bound of the iterator can be determined to be 1.
498513
- `vargs...` holds the encoded pointers of all the arrays (see `VectorizationBase`'s various pointer types).
499514
"""
500-
@generated function _avx_!(::Val{UNROLL}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB, vargs...) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
515+
@generated function _avx_!(::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, lb::LB, vargs::Tuple{Vararg{Any,K}}) where {UNROLL, OPS, ARF, AM, LPSYM, LB, K}
501516
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
502-
ls = _avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, vargs)
517+
ls = _avx_loopset(OPS, ARF, AM, LPSYM, LB.parameters, vargs.parameters)
503518
# return @show avx_body(ls, UNROLL)
504519
# @show UNROLL, OPS, ARF, AM, LPSYM, LB
505520
avx_body(ls, UNROLL)

0 commit comments

Comments
 (0)