Skip to content

Commit 63d50ef

Browse files
committed
Don't reject reorders based on compute without children, fixes #338.
1 parent 28233d2 commit 63d50ef

File tree

5 files changed

+10
-9
lines changed

5 files changed

+10
-9
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.75"
4+
version = "0.12.76"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lowering.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,6 @@ function lower(ls::LoopSet, inline::Int = -1)
917917
end
918918
function lower(ls::LoopSet, u₁::Int, u₂::Int, v::Int, inline::Int)
919919
fill_offset_memop_collection!(ls)
920-
fill_children!(ls)
921920
if u₂ > 1
922921
@assert num_loops(ls) > 1 "There is only $(num_loops(ls)) loop, but specified blocking parameter u₂ is $u₂."
923922
order, u₁loop, u₂loop, vectorized, _u₁, _u₂, c, shouldinline = choose_tile(ls, store_load_deps(operations(ls)), v)

src/condense_loopset.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,12 @@ function skip_constant(instr::Instruction)
316316
end
317317

318318
function add_reassigned_syms!(q::Expr, ls::LoopSet)
319-
for op operations(ls)
320-
if isconstant(op)
321-
instr = instruction(op)
322-
skip_constant(instr) || push!(q.args, instr.instr)
323-
end
319+
for op operations(ls)
320+
if isconstant(op)
321+
instr = instruction(op)
322+
skip_constant(instr) || push!(q.args, instr.instr)
324323
end
324+
end
325325
end
326326
function add_external_functions!(q::Expr, ls::LoopSet)
327327
for op operations(ls)
@@ -625,7 +625,7 @@ function generate_call_types(
625625
ops = operations(ls)
626626
for op ops
627627
instr::Instruction = instruction(op)
628-
if (isconstant(op) && (instr == LOOPCONSTANT)) && (!roots[identifier(op)])
628+
if ((isconstant(op) && (instr == LOOPCONSTANT)) && (!roots[identifier(op)]))
629629
instr = op.instruction = DROPPEDCONSTANT
630630
end
631631
push!(operation_descriptions.args, QuoteNode(instr.mod))

src/modeling/costs.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ const COST = Dict{Symbol,InstructionCost}(
267267
:prefetch1 => InstructionCost(0,0.0,0.0,0),
268268
:prefetch2 => InstructionCost(0,0.0,0.0,0),
269269
:convert => InstructionCost(4,0.5),
270+
:oftype => InstructionCost(4,0.5),
270271
:vpermilps177 => InstructionCost(1, 1.0),
271272
:vmovsldup => InstructionCost(1, 1.0),
272273
:vmovshdup => InstructionCost(1, 1.0),

src/modeling/graphs.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1306,7 +1306,7 @@ end
13061306

13071307
function accept_reorder_according_to_tracked_reductions(ls::LoopSet, reordered::Symbol)
13081308
for op operations(ls)
1309-
if reordered loopdependencies(op)
1309+
if (reordered loopdependencies(op)) && !(iscompute(op) & iszero(length(children(op))))
13101310
for opp parents(op)
13111311
(iscompute(opp) && isanouterreduction(ls, opp)) && return 0x00
13121312
end
@@ -1367,6 +1367,7 @@ end
13671367

13681368
offsetloadcollection(ls::LoopSet) = ls.omop
13691369
function fill_offset_memop_collection!(ls::LoopSet)
1370+
fill_children!(ls)
13701371
omop = offsetloadcollection(ls)
13711372
ops = operations(ls)
13721373
num_ops = length(ops)

0 commit comments

Comments
 (0)