Skip to content

Commit 1bae02b

Browse files
committed
Use one canonical ELTYPE
1 parent 4c91232 commit 1bae02b

File tree

3 files changed

+38
-30
lines changed

3 files changed

+38
-30
lines changed

src/codegen/lowering.jl

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,8 @@ end
426426

427427
function outer_reduction_zero(op::Operation, u₁u::Bool, Umax::Int, reduct_class::Float64, rs::Expr)
428428
reduct_zero = reduction_zero(reduct_class)
429-
Tsym = outer_reduct_init_typename(op)
429+
# Tsym = outer_reduct_init_typename(op)
430+
Tsym = ELTYPESYMBOL
430431
if isvectorized(op)
431432
if Umax == 1 || !u₁u
432433
if reduct_zero === :zero
@@ -614,7 +615,16 @@ function gc_preserve(ls::LoopSet, q::Expr)
614615
q2
615616
# Expr(:block, gcp)
616617
end
617-
618+
function push_outer_reduct_types!(pt::Expr, ls::LoopSet, ortypdefined::Bool)
619+
for j ls.outer_reductions
620+
oreducop = ls.operations[j]
621+
if ortypdefined
622+
push!(pt.args, typeof_expr(oreducop))
623+
else
624+
push!(pt.args, outer_reduct_init_typename(oreducop))
625+
end
626+
end
627+
end
618628
function determine_eltype(ls::LoopSet, ortypdefined::Bool)::Union{Symbol,Expr}
619629
narrays = length(ls.includedactualarrays)
620630
noreduc = length(ls.outer_reductions)
@@ -637,14 +647,7 @@ function determine_eltype(ls::LoopSet, ortypdefined::Bool)::Union{Symbol,Expr}
637647
for array ls.includedactualarrays
638648
push!(pt.args, Expr(:call, lv(:eltype), array))
639649
end
640-
for j ls.outer_reductions
641-
oreducop = ls.operations[j]
642-
if ortypdefined
643-
push!(pt.args, typeof_expr(oreducop))
644-
else
645-
push!(pt.args, outer_reduct_init_typename(oreducop))
646-
end
647-
end
650+
push_outer_reduct_types!(pt, ls, ortypdefined)
648651
return pt
649652
end
650653
@inline _eltype(x) = eltype(x)
@@ -653,20 +656,21 @@ function determine_width(
653656
ls::LoopSet, vectorized::Union{Symbol,Nothing}
654657
)
655658
vwidth_q = Expr(:call, lv(:pick_vector_width))
656-
if !(vectorized === nothing)
659+
if vectorized nothing
657660
vloop = getloop(ls, vectorized)
658661
if isstaticloop(vloop)
659662
push!(vwidth_q.args, Expr(:call, Expr(:curly, :Val, length(vloop))))
660663
end
661664
end
662665
# push!(vwidth_q.args, ls.T)
663-
if length(ls.includedactualarrays) < 2
664-
push!(vwidth_q.args, ELTYPESYMBOL)
665-
else
666-
for array ls.includedactualarrays
667-
push!(vwidth_q.args, Expr(:call, lv(:_eltype), array))
668-
end
669-
end
666+
# if length(ls.includedactualarrays) < 2
667+
push!(vwidth_q.args, ELTYPESYMBOL)
668+
# else
669+
# for array ∈ ls.includedactualarrays
670+
# push!(vwidth_q.args, Expr(:call, lv(:_eltype), array))
671+
# end
672+
# push_outer_reduct_types!(vwidth_q,
673+
# end
670674
vwidth_q
671675
end
672676
function init_remblock(unrolledloop::Loop, lssm::LoopStartStopManager, n::Int)#u₁loop::Symbol = unrolledloop.itersymbol)

src/constructors.jl

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@ macro tvectorize(args...)
217217
vectorize_macro(__module__, __source__, last(args), :(thread=true), Base.front(args)...)
218218
end
219219

220+
function def_outer_reduct_types!(ls::LoopSet)
221+
for or ls.outer_reductions
222+
op = operations(ls)[or]
223+
pushpreamble!(ls, Expr(:(=), outer_reduct_init_typename(op), typeof_expr(op)))
224+
end
225+
end
220226
"""
221227
@_vectorize
222228
@@ -230,19 +236,17 @@ macro _vectorize(q)
230236
q = macroexpand(__module__, q)
231237
ls = LoopSet(q, __module__)
232238
set_hw!(ls)
233-
for or ls.outer_reductions
234-
op = operations(ls)[or]
235-
pushpreamble!(ls, Expr(:(=), outer_reduct_init_typename(op), typeof_expr(op)))
236-
end
239+
def_outer_reduct_types!(ls)
237240
esc(Expr(:block, ls.prepreamble, lower_and_split_loops(ls, -1)))
238241
end
239242
macro _vectorize(arg, q)
240-
@assert q.head === :for
241-
q = macroexpand(__module__, q)
242-
inline, check_empty, u₁, u₂ = check_macro_kwarg(arg, false, false, zero(Int8), zero(Int8), 1)
243-
ls = LoopSet(q, __module__)
244-
set_hw!(ls)
245-
esc(Expr(:block, ls.prepreamble, lower(ls, u₁ % Int, u₂ % Int, -1)))
243+
@assert q.head === :for
244+
q = macroexpand(__module__, q)
245+
inline, check_empty, u₁, u₂ = check_macro_kwarg(arg, false, false, zero(Int8), zero(Int8), 1)
246+
ls = LoopSet(q, __module__)
247+
set_hw!(ls)
248+
def_outer_reduct_types!(ls)
249+
esc(Expr(:block, ls.prepreamble, lower(ls, u₁ % Int, u₂ % Int, -1)))
246250
end
247251

248252
macro vectorize_debug(q)

src/reconstruct_loopset.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -607,8 +607,8 @@ function avx_loopset!(
607607
extractind = add_ops!(ls, instr, ops, mrefs, opsymbols, 1, nopsv, expandedv, elementbytes)
608608
extractind = process_metadata!(ls, AM, extractind)
609609
extractind = add_array_symbols!(ls, arraysymbolinds, extractind)
610-
extract_external_functions!(ls, extractind, vargs)
611-
extract_outerreduct_types!(ls, extractind, vargs)
610+
extractind = extract_external_functions!(ls, extractind, vargs)
611+
extractind = extract_outerreduct_types!(ls, extractind, vargs)
612612
ls
613613
end
614614
function avx_body(ls::LoopSet, UNROLL::Tuple{Bool,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt})

0 commit comments

Comments
 (0)