Skip to content

Commit a3cdfff

Browse files
committed
Special case some global constant symbols to avoid passing them as arguments.
1 parent 4dfcab7 commit a3cdfff

File tree

8 files changed

+55
-38
lines changed

8 files changed

+55
-38
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.13"
4+
version = "0.12.14"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_compute.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,6 @@ function lower_compute!(
512512
loopval = first(loopdependencies(opp))
513513
add_loopvalue!(instrcall, loopval, ua, u₁)
514514
elseif name(opp) === name(op)
515-
516515
selfdep = n
517516
if ((isvectorized(opp) && !isvectorized(op))) ||
518517
(parents_u₁syms[n] != u₁unrolledsym) || (parents_u₂syms[n] != u₂unrolledsym)
@@ -531,6 +530,8 @@ function lower_compute!(
531530
push!(q.args, Expr(:(=), reducedparentname, reduced_u₂))
532531
reduced_u₂ = reduce_parent!(q, ls, op, opp, reducedparentname)
533532
push!(instrcall.args, reduced_u₂)
533+
elseif isconstant(opp) && instruction(opp).mod === GLOBALCONSTANT
534+
push!(instrcall.args, GlobalRef(Base, instruction(opp).instr))
534535
else
535536
parent, uₚ = parent_op_name!(q, ls, parents_op, n, modsuffix, suffix_, parents_u₁syms, parents_u₂syms, u₁, u₂max, u₂unrolledsym, op, tiledouterreduction)
536537
parent = reduce_parent!(q, ls, op, opp, parent)

src/codegen/lower_constant.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ function lower_constant!(
102102
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = ua
103103
mvar, opu₁, opu₂ = variable_name_and_unrolled(op, u₁loopsym, u₂loopsym, vloopsym, suffix, ls)
104104
!opu₂ && suffix > 0 && return
105-
instruction = op.instruction
106-
constsym = instruction.instr
107-
# constsym = Symbol(instruction.instr, '_', 1)
105+
instr = instruction(op)
106+
instr.mod === GLOBALCONSTANT && return
107+
constsym = instr.instr
108108
reducedchildvectorized = vloopsym reducedchildren(op)
109109
if reducedchildvectorized || isvectorized(op) || vloopsym reduceddependencies(op) || should_broadcast_op(op)
110110
# call = Expr(:call, lv(:vbroadcast), W, Expr(:call, lv(:maybeconvert), typeT, constsym))
@@ -160,7 +160,7 @@ function lower_constant!(
160160
nothing
161161
end
162162

163-
isconstantop(op::Operation) = (instruction(op) === LOOPCONSTANT) || (isconstant(op) && length(loopdependencies(op)) == 0)
163+
isconstantop(op::Operation) = (instruction(op) == LOOPCONSTANT) || (isconstant(op) && length(loopdependencies(op)) == 0)
164164
function constantopname(op::Operation)
165165
instr = instruction(op)
166166
if instr === LOOPCONSTANT

src/condense_loopset.jl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ function loopset_return_value(ls::LoopSet, ::Val{extract}) where {extract}
243243
end
244244
const DROPPEDCONSTANT = Instruction(Symbol("##DROPPED#CONSTANT##"),Symbol("##DROPPED#CONSTANT##"))
245245
function skip_constant(instr::Instruction)
246-
((instr == LOOPCONSTANT) || (instr.mod === :numericconstant)) || (instr == DROPPEDCONSTANT)
246+
(((instr == LOOPCONSTANT) || (instr.mod === :numericconstant)) || (instr == DROPPEDCONSTANT)) || instr.mod === GLOBALCONSTANT
247247
end
248248

249249
function add_reassigned_syms!(q::Expr, ls::LoopSet)
@@ -456,6 +456,15 @@ function remove_outer_reducts!(roots::Vector{Bool}, ls::LoopSet)
456456
end
457457
end
458458

459+
# function generate_call_split(ls::LoopSet, (inline,u₁,u₂)::Tuple{Bool,Int8,Int8}, thread::UInt, debug::Bool = false)
460+
# ops = operations(ls)
461+
# for op ∈ ops
462+
# if (iscompute(op) && (instruction(op).instr === :ifelse)) && iszero(length(loopdependencies(first(parents(op)))))
463+
# # we want to eliminate
464+
# end
465+
# end
466+
# end
467+
459468
# Try to condense in type stable manner
460469
function generate_call(ls::LoopSet, (inline,u₁,u₂)::Tuple{Bool,Int8,Int8}, thread::UInt, debug::Bool = false)
461470
extra_args = Expr(:tuple)

src/modeling/costs.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ Base.hash(instr::Instruction, h::UInt64) = hash(instr.instr, hash(instr.mod, h))
3636
# end
3737
Base.isequal(ins1::Instruction, ins2::Instruction) = (ins1.instr === ins2.instr) && (ins1.mod === ins2.mod)
3838

39-
const LOOPCONSTANT = Instruction(:LoopVectorization, Symbol("LOOPCONSTANTINSTRUCTION"))
40-
4139
"""
4240
InstructionCost
4341

src/modeling/graphs.jl

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1344,32 +1344,34 @@ Returns `n`, where `n` is the constant declarations's index among parents(op), i
13441344
Returns `-1` if not an outerreduction.
13451345
"""
13461346
function isouterreduction(ls::LoopSet, op::Operation)
1347-
if isconstant(op) # equivalent to checking if length(loopdependencies(op)) == 0
1348-
op.instruction == LOOPCONSTANT && return 0
1349-
ops = operations(ls)
1350-
for or ls.outer_reductions
1351-
name(op) === name(ops[or]) && return 0
1347+
if isconstant(op) # equivalent to checking if length(loopdependencies(op)) == 0
1348+
instr = op.instruction
1349+
instr == LOOPCONSTANT && return 0
1350+
instr.mod === GLOBALCONSTANT && return -1
1351+
ops = operations(ls)
1352+
for or ls.outer_reductions
1353+
name(op) === name(ops[or]) && return 0
1354+
end
1355+
-1
1356+
elseif iscompute(op)
1357+
var = op.variable
1358+
for opid ls.outer_reductions
1359+
rop = operations(ls)[opid]
1360+
if rop === op
1361+
for (n,opp) enumerate(parents(op))
1362+
opp.variable === var && return n
13521363
end
1353-
-1
1354-
elseif iscompute(op)
1355-
var = op.variable
1356-
for opid ls.outer_reductions
1357-
rop = operations(ls)[opid]
1358-
if rop === op
1359-
for (n,opp) enumerate(parents(op))
1360-
opp.variable === var && return n
1361-
end
1362-
else
1363-
for (n,opp) enumerate(parents(op))
1364-
opp === rop && return n
1365-
search_tree(parents(opp), rop.variable) && return n
1366-
end
1367-
end
1364+
else
1365+
for (n,opp) enumerate(parents(op))
1366+
opp === rop && return n
1367+
search_tree(parents(opp), rop.variable) && return n
13681368
end
1369-
-1
1370-
else
1371-
-1
1369+
end
13721370
end
1371+
-1
1372+
else
1373+
-1
1374+
end
13731375
end
13741376

13751377
struct LoopError <: Exception

src/modeling/operations.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
const DISCONTIGUOUS = Symbol("##DISCONTIGUOUSSUBARRAY##")
22
const CONSTANTZEROINDEX = Symbol("##CONSTANTZEROINDEX##")
3+
const LOOPCONSTANT = Instruction(:LoopVectorization, Symbol("LOOPCONSTANTINSTRUCTION"))
4+
const GLOBALCONSTANT = Symbol("##GLOBAL##CONSTANT##")
5+
36

47

58
"""
@@ -250,8 +253,9 @@ const NOPARENTS = Operation[]
250253
function Base.show(io::IO, op::Operation)
251254
if isconstant(op)
252255
if op.instruction === LOOPCONSTANT
253-
254256
print(io, Expr(:(=), op.variable, 0))
257+
elseif op.instruction.instr === GLOBALCONSTANT
258+
print(io, op.instruction.instr)
255259
else
256260
print(io, Expr(:(=), op.variable, op.instruction.instr))
257261
end

src/parse/add_constants.jl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
const CONSTANT_SYMBOLS = (:nothing, :Float64, :Float32, :Int8, :UInt8, :Int16, :UInt16, :Int32, :UInt32, :Int64, :UInt64)
12
function add_constant!(ls::LoopSet, var::Symbol, elementbytes::Int)
2-
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS)
3-
rop = pushop!(ls, op, var)
4-
rop === op && pushpreamble!(ls, op, var)
5-
rop
3+
globalconst = Base.sym_in(var, CONSTANT_SYMBOLS)
4+
instr = globalconst ? Instruction(GLOBALCONSTANT, var) : LOOPCONSTANT
5+
op = Operation(length(operations(ls)), var, elementbytes, instr, constant, NODEPENDENCY, Symbol[], NOPARENTS)
6+
rop = pushop!(ls, op, var)
7+
(!globalconst && (rop === op)) && pushpreamble!(ls, op, var)
8+
rop
69
end
710
# function add_constant!(ls::LoopSet, var, elementbytes::Int = 8)
811
# sym = gensym(:loopconstant)
@@ -14,7 +17,7 @@ function add_constant!(ls::LoopSet, var::Number, elementbytes::Int = 8)
1417
ops = operations(ls)
1518
typ = var isa Integer ? HardInt : HardFloat
1619
rop = pushop!(ls, op)
17-
rop !== op && return rop
20+
rop === op || return rop
1821
if iszero(var)
1922
for (id,typ_) ls.preamble_zeros
2023
(instruction(ops[id]) == LOOPCONSTANT && typ == typ_) && return ops[id]

0 commit comments

Comments
 (0)