Skip to content

Commit c9ec9f4

Browse files
committed
For non loopedindex, check that parent ops match as symbols matching may not be enough
1 parent 89e5448 commit c9ec9f4

File tree

4 files changed

+57
-38
lines changed

4 files changed

+57
-38
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.51"
4+
version = "0.12.52"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_memory_common.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
function parentind(ind::Symbol, op::Operation)
2-
for (id,opp) enumerate(parents(op))
3-
name(opp) === ind && return id
4-
end
5-
-1
2+
for (id,opp) enumerate(parents(op))
3+
name(opp) === ind && return id
4+
end
5+
-1
66
end
77
function symbolind(ind::Symbol, op::Operation, td::UnrollArgs, ls::LoopSet)
88
id = parentind(ind, op)

src/modeling/operations.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -227,23 +227,23 @@ isu₂unrolled(op::Operation) = op.u₂unrolled
227227
isvectorized(op::Operation) = op.vectorized
228228

229229
function matches(op1::Operation, op2::Operation)
230-
op1 === op2 && return true
231-
op1.instruction === op2.instruction || return false
232-
op1.node_type == op2.node_type || return false
233-
if isconstant(op1)
234-
return iszero(length(loopdependencies(op1))) && iszero(length(loopdependencies(op2))) && (mangledvar(op1) === mangledvar(op2))
235-
end
236-
op1.dependencies == op2.dependencies || return false
237-
op2.reduced_deps == op2.reduced_deps || return false
238-
if accesses_memory(op1)
239-
op1.ref == op2.ref || return false
240-
end
241-
nparents = length(parents(op1))
242-
nparents == length(parents(op2)) || return false
243-
for p 1:nparents
244-
matches(op1.parents[p], op2.parents[p]) || return false
245-
end
246-
true
230+
op1 === op2 && return true
231+
op1.instruction === op2.instruction || return false
232+
op1.node_type == op2.node_type || return false
233+
if isconstant(op1)
234+
return iszero(length(loopdependencies(op1))) && iszero(length(loopdependencies(op2))) && (mangledvar(op1) === mangledvar(op2))
235+
end
236+
op1.dependencies == op2.dependencies || return false
237+
op2.reduced_deps == op2.reduced_deps || return false
238+
if accesses_memory(op1)
239+
op1.ref == op2.ref || return false
240+
end
241+
nparents = length(parents(op1))
242+
nparents == length(parents(op2)) || return false
243+
for p 1:nparents
244+
matches(op1.parents[p], op2.parents[p]) || return false
245+
end
246+
true
247247
end
248248

249249
# negligible save on allocations for operations that don't need these (eg, constants).

src/parse/add_loads.jl

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,43 @@
11
function maybeaddref!(ls::LoopSet, op::Operation)
2-
ref = op.ref
3-
id = findfirst(==(ref), ls.refs_aliasing_syms)
4-
# try to CSE
5-
if id === nothing
6-
push!(ls.syms_aliasing_refs, name(op))
7-
push!(ls.refs_aliasing_syms, ref)
8-
0
9-
else
10-
id
11-
end
2+
ref = op.ref
3+
id = findfirst(==(ref), ls.refs_aliasing_syms)
4+
# try to CSE
5+
if id === nothing
6+
push!(ls.syms_aliasing_refs, name(op))
7+
push!(ls.refs_aliasing_syms, ref)
8+
0
9+
else
10+
id
11+
end
1212
end
1313

1414
function add_load!(ls::LoopSet, op::Operation, actualarray::Bool = true)
15-
@assert isload(op)
16-
if (id = maybeaddref!(ls, op)) > 0 # try to CSE
17-
opp = ls.opdict[ls.syms_aliasing_refs[id]] # throw an error if not found.
18-
return isstore(opp) ? getop(ls, first(parents(opp))) : opp
15+
@assert isload(op)
16+
if (id = maybeaddref!(ls, op)) > 0 # try to CSE
17+
opp = ls.opdict[ls.syms_aliasing_refs[id]] # throw an error if not found.
18+
op_inds = getindicesonly(op)
19+
li = op.ref.loopedindex
20+
allmatch = true
21+
parents_op = parents(op)
22+
parents_opp = parents(opp)
23+
for i eachindex(op_inds)
24+
li[i] && continue
25+
ind = op_inds[i]
26+
if (id_op = parentind(ind, op)) > 0
27+
if (id_opp = parentind(ind, opp)) > 0
28+
if !matches(parents_op[id_op], parents_opp[id_opp])
29+
allmatch = false
30+
break
31+
end
32+
end
33+
end
1934
end
20-
add_vptr!(ls, op.ref.ref.array, vptr(op), actualarray)
21-
pushop!(ls, op, name(op))
35+
if allmatch
36+
return isstore(opp) ? getop(ls, first(parents(opp))) : opp
37+
end
38+
end
39+
add_vptr!(ls, op.ref.ref.array, vptr(op), actualarray)
40+
pushop!(ls, op, name(op))
2241
end
2342

2443
function add_load!(

0 commit comments

Comments
 (0)