Skip to content

Commit e182eb0

Browse files
committed
Count total constant offsets when calculating dimension for cse-ing a dynamic index. Fixes #279.
1 parent f72f482 commit e182eb0

File tree

2 files changed

+34
-38
lines changed

2 files changed

+34
-38
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.34"
4+
version = "0.12.35"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/parse/memory_ops_common.jl

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -51,47 +51,43 @@ end
5151
@inline staticdims(::Any) = One()
5252
@inline staticdims(::CartesianIndices{N}) where {N} = StaticInt{N}()
5353

54-
55-
function append_loop_staticdims!(valcall::Expr, loop::Loop)
56-
if isstaticloop(loop)
57-
push!(valcall.args, staticexpr(1))
58-
else
59-
push!(valcall.args, Expr(:call, lv(:staticdims), loop.rangesym))#loop_boundary(loop)))
60-
end
61-
nothing
54+
function append_loop_staticdims!(valcall::Expr, loop::Loop, constoffset::Int)
55+
isstaticloop(loop) && return constoffset + 1
56+
push!(valcall.args, Expr(:call, lv(:staticdims), loop.rangesym))
57+
constoffset
6258
end
6359
function subset_vptr!(ls::LoopSet, vptr::Symbol, indnum::Int, ind, previndices, loopindex, subset::Bool)
64-
str_typ = subset ? "subset" : "index"
65-
subsetvptr = Symbol(vptr, "_##$(str_typ)##_$(indnum)_##with##_$(ind)_##")
66-
valcall = staticexpr(1)
67-
if indnum > 1
68-
offset = first(previndices) === DISCONTIGUOUS
69-
valcall = Expr(:call, :(+), valcall)
70-
for i 1:indnum-1
71-
loopdep = if loopindex[i]
72-
index = previndices[i+offset]
73-
if index === CONSTANTZEROINDEX
74-
if indnum == 2 && i == 1
75-
push!(valcall.args, staticexpr(1))
76-
else
77-
valcall.args[2] = staticexpr(2)
78-
end
79-
continue
80-
else
81-
index
82-
end
83-
else
84-
# assumes all staticdims will be of equal length once expanded...
85-
# A[I + J, constindex], I and J may be CartesianIndices. This requires they all be of same number of dims
86-
first(loopdependencies(ls.opdict[previndices[i+offset]]))
87-
end
88-
append_loop_staticdims!(valcall, getloop(ls, loopdep))
60+
str_typ = subset ? "subset" : "index"
61+
subsetvptr = Symbol(vptr, "_##$(str_typ)##_$(indnum)_##with##_$(ind)_##")
62+
valcall = Expr(:call, +)
63+
constoffset = 1
64+
if indnum > 1
65+
offset = first(previndices) === DISCONTIGUOUS
66+
# valcall = Expr(:call, :(+), valcall)
67+
for i 1:indnum-1
68+
if loopindex[i]
69+
loopdep = previndices[i+offset]
70+
if loopdep === CONSTANTZEROINDEX
71+
constoffset += 1
72+
continue
8973
end
74+
else
75+
# assumes all staticdims will be of equal length once expanded...
76+
# A[I + J, constindex], I and J may be CartesianIndices. This requires they all be of same number of dims
77+
loopdep = first(loopdependencies(ls.opdict[previndices[i+offset]]))
78+
end
79+
constoffset = append_loop_staticdims!(valcall, getloop(ls, loopdep), constoffset)
9080
end
91-
# indm1 = ind isa Integer ? ind - 1 : Expr(:call, :-, ind, 1)
92-
f = lv(Core.ifelse(subset, :subsetview, :_gesp))
93-
pushpreamble!(ls, Expr(:(=), subsetvptr, Expr(:call, f, vptr, valcall, ind)))
94-
subsetvptr
81+
end
82+
# indm1 = ind isa Integer ? ind - 1 : Expr(:call, :-, ind, 1)
83+
f = lv(Core.ifelse(subset, :subsetview, :_gesp))
84+
constoffsetexpr = staticexpr(constoffset)
85+
if length(valcall.args) 1
86+
push!(valcall.args, constoffsetexpr)
87+
constoffsetexpr = valcall
88+
end
89+
pushpreamble!(ls, Expr(:(=), subsetvptr, Expr(:call, f, vptr, constoffsetexpr, ind)))
90+
subsetvptr
9591
end
9692

9793
function gesp_const_offset!(ls::LoopSet, vptrarray, ninds, indices, loopedindex, mlt::Integer, sym)

0 commit comments

Comments
 (0)