Skip to content

Commit 07de1e4

Browse files
committed
Reorder lowering in way that slightly improves performance in some cases.
1 parent 6f299fa commit 07de1e4

File tree

1 file changed

+20
-13
lines changed

1 file changed

+20
-13
lines changed

src/lowering.jl

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,12 @@ function lower_block(
8282
for prepost 1:2
8383
# !U && !T
8484
lower!(blockq, ops[1,1,prepost,n], vectorized, ls, unrolled, tiled, U, nothing, mask)
85-
# for u ∈ 0:U-1 # U && !T
86-
lower!(blockq, ops[2,1,prepost,n], vectorized, ls, unrolled, tiled, U, nothing, mask)
87-
# end
88-
if length(ops[1,2,prepost,n]) + length(ops[2,2,prepost,n]) > 0
85+
opsv1 = ops[1,2,prepost,n]
86+
opsv2 = ops[2,2,prepost,n]
87+
if length(opsv1) + length(opsv2) > 0
8988
for store (false,true)
9089
# let store = nothing
9190
nstores = 0
92-
opsv1 = ops[1,2,prepost,n]
93-
opsv2 = ops[2,2,prepost,n]
94-
iszero(length(opsv1) + length(opsv2)) && continue
9591
iszero(length(opsv1)) || (nstores += sum(isstore, opsv1))
9692
iszero(length(opsv2)) || (nstores += sum(isstore, opsv2))
9793
for t 0:T-1
@@ -102,21 +98,32 @@ function lower_block(
10298
else
10399
push!(blockq.args, Expr(:+=, tiled, 1))
104100
end
105-
# !U && T
106-
if dontmaskfirsttiles && t < T - 1
101+
if dontmaskfirsttiles && t < T - 1 # !U && T
107102
lower!(blockq, opsv1, vectorized, ls, unrolled, tiled, U, t, nothing, store)
108-
# for u ∈ 0:U-1 # U && T
103+
else # !U && T
104+
lower!(blockq, opsv1, vectorized, ls, unrolled, tiled, U, t, mask, store)
105+
end
106+
if iszero(t) && !store # U && !T
107+
# for u ∈ 0:U-1
108+
lower!(blockq, ops[2,1,prepost,n], vectorized, ls, unrolled, tiled, U, nothing, mask)
109+
# end
110+
end
111+
if dontmaskfirsttiles && t < T - 1 # U && T
112+
# for u ∈ 0:U-1
109113
lower!(blockq, opsv2, vectorized, ls, unrolled, tiled, U, t, nothing, store)
110114
# end
111-
else
112-
lower!(blockq, opsv1, vectorized, ls, unrolled, tiled, U, t, mask, store)
113-
# for u ∈ 0:U-1 # U && T
115+
else # U && T
116+
# for u ∈ 0:U-1
114117
lower!(blockq, opsv2, vectorized, ls, unrolled, tiled, U, t, mask, store)
115118
# end
116119
end
117120
end
118121
nstores == 0 && break
119122
end
123+
else
124+
# for u ∈ 0:U-1 # U && !T
125+
lower!(blockq, ops[2,1,prepost,n], vectorized, ls, unrolled, tiled, U, nothing, mask)
126+
# end
120127
end
121128
if n > 1 && prepost == 1
122129
push!(blockq.args, lower_unrolled_dynamic(ls, us, n-1, !isnothing(mask)))

0 commit comments

Comments
 (0)