Skip to content

Commit c8aab0e

Browse files
committed
Prune deps from constants if immediate children don't depend on them.
1 parent 9cabbf6 commit c8aab0e

File tree

6 files changed

+101
-11
lines changed

6 files changed

+101
-11
lines changed

β€ŽProject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.6"
4+
version = "0.12.7"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

β€Žsrc/codegen/lower_constant.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ function typeof_sym(ls::LoopSet, op::Operation, zerotyp::NumberType)
4242
ELTYPESYMBOL
4343
end
4444
end
45+
function in_reduced_children(op::Operation, s::Symbol)
46+
47+
end
4548

4649
function lower_zero!(
4750
q::Expr, op::Operation, ls::LoopSet, ua::UnrollArgs, zerotyp::NumberType = zerotype(ls, op)

β€Žsrc/codegen/lowering.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -894,10 +894,8 @@ function isunrolled_sym(op::Operation, u₁loop::Symbol, uβ‚‚loop::Symbol, vloop
894894
uβ‚‚ild = uβ‚‚loop ∈ reducedchildren(op)
895895
end
896896
end
897-
# @show op u₁ild, uβ‚‚ild
898897
(u₁ild & uβ‚‚ild) || return u₁ild, uβ‚‚ild
899898
reductops = isconstant(op) ? reducedchildren(op) : reduceddependencies(op)
900-
# @show op reductops
901899
iszero(length(reductops)) && return true, true
902900
u₁reduced = u₁loop ∈ reductops
903901
uβ‚‚reduced = uβ‚‚loop ∈ reductops

β€Žsrc/modeling/graphs.jl

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -577,11 +577,13 @@ gensym!(ls::LoopSet, s) = Symbol("###$(s)###$(ls.symcounter[] += 1)###")
577577
function cacheunrolled!(ls::LoopSet, u₁loop::Symbol, uβ‚‚loop::Symbol, vloopsym::Symbol)
578578
vloop = getloop(ls, vloopsym)
579579
for op ∈ operations(ls)
580-
setunrolled!(op, u₁loop, uβ‚‚loop, vloopsym)
581580
empty!(children(op))
582581
for opp ∈ parents(op)
583582
push!(children(opp), op)
584583
end
584+
end
585+
for op ∈ operations(ls)
586+
setunrolled!(op, u₁loop, uβ‚‚loop, vloopsym)
585587
if accesses_memory(op)
586588
rc = rejectcurly(ls, op, u₁loop, vloopsym)
587589
op.rejectcurly = rc
@@ -1148,12 +1150,6 @@ function looplength(ls::LoopSet, s::Symbol)
11481150
end
11491151
end
11501152

1151-
# function getunrolled(ls::LoopSet)
1152-
# order = names(ls)
1153-
# us = ls.unrollspecification[]
1154-
# @unpack u₁loopnum, uβ‚‚loopnum = us
1155-
# order[u₁loopnum], order[uβ‚‚loopnum]
1156-
# end
11571153
offsetloadcollection(ls::LoopSet) = ls.omop
11581154
function fill_offset_memop_collection!(ls::LoopSet)
11591155
omop = offsetloadcollection(ls)

β€Žsrc/modeling/operations.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,13 @@ function setunrolled!(op::Operation, u₁loopsym, uβ‚‚loopsym, vectorized)
228228
op.u₁unrolled = u₁loopsym ∈ loopdependencies(op)
229229
op.uβ‚‚unrolled = uβ‚‚loopsym ∈ loopdependencies(op)
230230
op.vectorized = vectorized ∈ loopdependencies(op)
231+
if isconstant(op)
232+
for opp ∈ children(op)
233+
op.u₁unrolled = op.u₁unrolled && u₁loopsym ∈ loopdependencies(opp)
234+
op.uβ‚‚unrolled = op.uβ‚‚unrolled && uβ‚‚loopsym ∈ loopdependencies(opp)
235+
op.vectorized = op.vectorized && vectorized ∈ loopdependencies(opp)
236+
end
237+
end
231238
nothing
232239
end
233240

β€Žtest/tullio.jl

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,93 @@ using LoopVectorization, Test
6868
@test and(A)
6969
A[3] = -1
7070
@test !and(A)
71-
71+
72+
73+
function dadada!(EV, Fu, V, ♻️ = nothing)
74+
(ndims)(EV) == 5 || (throw)("expected a 5-array EV")
75+
(ndims)(Fu) == 2 || (throw)("expected a 2-array Fu")
76+
(ndims)(V) == 4 || (throw)("expected a 4-array V")
77+
78+
local 𝒢𝓍a = (axes)(EV, 1)
79+
local 𝒢𝓍k = (axes)(EV, 2)
80+
(axes)(V, 1) == (axes)(EV, 2) || (throw)("range of index k must agree")
81+
local 𝒢𝓍iu = (axes)(Fu, 2)
82+
(axes)(V, 4) == (axes)(Fu, 2) || (throw)("range of index iu must agree")
83+
local 𝒢𝓍z = (axes)(EV, 4)
84+
(axes)(V, 3) == (axes)(EV, 4) || (throw)("range of index z must agree")
85+
local 𝒢𝓍u = (axes)(EV, 5)
86+
(axes)(Fu, 1) == (axes)(EV, 5) || (throw)("range of index u must agree")
87+
local 𝒢𝓍x = (axes)(EV, 3)
88+
(axes)(V, 2) == (axes)(EV, 3) || (throw)("range of index x must agree")
89+
90+
β„› = EV
91+
92+
@avx for u = 𝒢𝓍u
93+
for z = 𝒢𝓍z
94+
for x = 𝒢𝓍x
95+
for k = 𝒢𝓍k
96+
for a = 𝒢𝓍a
97+
π’œπ’Έπ’Έ = zero(eltype(EV)) # simpler, same error
98+
# π’œπ’Έπ’Έ = if ♻️ === nothing
99+
# zero(𝒯)
100+
# else
101+
# β„›[a, k, x, z, u]
102+
# end
103+
for iu = 𝒢𝓍iu
104+
π’œπ’Έπ’Έ = π’œπ’Έπ’Έ + Fu[u, iu] * V[k, x, z, iu]
105+
end
106+
β„›[a, k, x, z, u] = π’œπ’Έπ’Έ
107+
end
108+
end
109+
end
110+
end
111+
end
112+
end
113+
function dadada_noavx!(EV, Fu, V, ♻️ = nothing)
114+
(ndims)(EV) == 5 || (throw)("expected a 5-array EV")
115+
(ndims)(Fu) == 2 || (throw)("expected a 2-array Fu")
116+
(ndims)(V) == 4 || (throw)("expected a 4-array V")
117+
118+
local 𝒢𝓍a = (axes)(EV, 1)
119+
local 𝒢𝓍k = (axes)(EV, 2)
120+
(axes)(V, 1) == (axes)(EV, 2) || (throw)("range of index k must agree")
121+
local 𝒢𝓍iu = (axes)(Fu, 2)
122+
(axes)(V, 4) == (axes)(Fu, 2) || (throw)("range of index iu must agree")
123+
local 𝒢𝓍z = (axes)(EV, 4)
124+
(axes)(V, 3) == (axes)(EV, 4) || (throw)("range of index z must agree")
125+
local 𝒢𝓍u = (axes)(EV, 5)
126+
(axes)(Fu, 1) == (axes)(EV, 5) || (throw)("range of index u must agree")
127+
local 𝒢𝓍x = (axes)(EV, 3)
128+
(axes)(V, 2) == (axes)(EV, 3) || (throw)("range of index x must agree")
129+
130+
β„› = EV
131+
132+
@inbounds @fastmath for u = 𝒢𝓍u
133+
for z = 𝒢𝓍z
134+
for x = 𝒢𝓍x
135+
for k = 𝒢𝓍k
136+
for a = 𝒢𝓍a
137+
π’œπ’Έπ’Έ = zero(eltype(EV)) # simpler, same error
138+
# π’œπ’Έπ’Έ = if ♻️ === nothing
139+
# zero(𝒯)
140+
# else
141+
# β„›[a, k, x, z, u]
142+
# end
143+
for iu = 𝒢𝓍iu
144+
π’œπ’Έπ’Έ = π’œπ’Έπ’Έ + Fu[u, iu] * V[k, x, z, iu]
145+
end
146+
β„›[a, k, x, z, u] = π’œπ’Έπ’Έ
147+
end
148+
end
149+
end
150+
end
151+
end
152+
end
153+
EV, Fu, V = rand(3,3,3,3,3), rand(3,3), rand(3,3,3,3);
154+
EV2 = similar(EV);
155+
dadada!(EV, Fu, V)
156+
dadada_noavx!(EV2, Fu, V)
157+
@test EV β‰ˆ EV2
72158
end
73159

74160

0 commit comments

Comments
Β (0)