Skip to content

Commit 3131e1c

Browse files
committed
Add cse-ed loads to opdict, fixes #343. Stregthen affine index analysis to better handle code from that issue.
1 parent bf019ca commit 3131e1c

File tree

7 files changed

+292
-167
lines changed

7 files changed

+292
-167
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.79"
4+
version = "0.12.80"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_load.jl

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ function lower_load!(
316316
_lower_load!(q, ls, op, td, mask)
317317
end
318318
function _lower_load!(
319-
q::Expr, ls::LoopSet, op::Operation, td::UnrollArgs, mask::Bool, inds_calc_by_ptr_offset::Vector{Bool} = indices_calculated_by_pointer_offsets(ls, op.ref)
319+
q::Expr, ls::LoopSet, op::Operation, td::UnrollArgs, mask::Bool, inds_calc_by_ptr_offset::Vector{Bool} = indices_calculated_by_pointer_offsets(ls, op.ref)
320320
)
321321
if rejectinterleave(op)
322322
return lower_load_no_optranslation!(q, ls, op, td, mask, inds_calc_by_ptr_offset)
@@ -367,8 +367,12 @@ function rejectcurly(ls::LoopSet, op::Operation, u₁loopsym::Symbol, vloopsym::
367367
end
368368
else
369369
opp = findop(parents(op), ind)
370-
(isu₁unrolled(opp) || isu₂unrolled(opp)) && return true
371-
length(parents(opp)) == 2 || return true
370+
isu₂unrolled(opp) && return true
371+
if length(parents(opp)) == 3
372+
instruction(opp).instr === :muladd
373+
elseif length(parents(opp)) 2
374+
return true
375+
end
372376
if instruction(opp).instr === :(+) || instruction(opp).instr === :add_fast
373377
isadd = true
374378
elseif instruction(opp).instr === :(-) || instruction(opp).instr === :sub_fast
@@ -412,7 +416,8 @@ function rejectinterleave(ls::LoopSet, op::Operation, vloop::Loop, idsformap::Su
412416
end
413417
end
414418
vloopsym = vloop.itersymbol;
415-
(first(getindices(op)) === vloopsym) && (length(idsformap) first(getstrides(op)) * gethint(strd))
419+
# @show op first(getindices(op)) length(idsformap), first(getstrides(op)), gethint(strd)
420+
(first(getindices(op)) === vloopsym) && (length(idsformap) abs(first(getstrides(op)) * gethint(strd)))
416421
end
417422
# function lower_load_collection_manual_u₁unroll!(
418423
# q::Expr, ls::LoopSet, opidmap::Vector{Int},

src/modeling/graphs.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@ function rejectinterleave!(ls::LoopSet, op::Operation, u₁loop::Symbol, u₂loo
574574
setunrolled!(ls, op, u₁loop, u₂loop, vloopsym)
575575
if accesses_memory(op)
576576
rc = rejectcurly(ls, op, u₁loop, vloopsym)
577+
# @show rc, op
577578
op.rejectcurly = rc
578579
if rc
579580
op.rejectinterleave = true

src/parse/add_loads.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ function add_load!(ls::LoopSet, op::Operation, actualarray::Bool = true)
3232
end
3333
end
3434
end
35-
allmatch && return isstore(opp) ? first(parents(opp)) : opp
35+
if allmatch
36+
opp = isstore(opp) ? first(parents(opp)) : opp
37+
ls.opdict[name(op)] = opp
38+
return opp
39+
end
3640
end
3741
add_vptr!(ls, op.ref.ref.array, vptr(op), actualarray)
3842
pushop!(ls, op, name(op))

0 commit comments

Comments
 (0)