Skip to content

Commit 6736394

Browse files
committed
Bump VectorizationBase and adjust split loop heuristic. Fixes #283
1 parent 9ce0c2c commit 6736394

File tree

2 files changed

+36
-34
lines changed

2 files changed

+36
-34
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.38"
4+
version = "0.12.39"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -30,5 +30,5 @@ Static = "0.2"
3030
StrideArraysCore = "0.1.12"
3131
ThreadingUtilities = "0.4.2"
3232
UnPack = "1"
33-
VectorizationBase = "0.20.16"
33+
VectorizationBase = "0.20.17"
3434
julia = "1.5"

src/codegen/split_loops.jl

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -89,39 +89,41 @@ function returned_ops(ls::LoopSet)
8989
end
9090

9191
function lower_and_split_loops(ls::LoopSet, inline::Int)
92-
split_candidates = returned_ops(ls)
93-
length(split_candidates) > 1 || return lower(ls, inline)
94-
order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, cost_fused, shouldinline_fused = choose_order_cost(ls)
95-
remaining_ops = Vector{Int}(undef, length(split_candidates) - 1); split_1 = Int[0];
96-
# for (ind,i) ∈ enumerate(split_candidates)
97-
for (ind,i) enumerate(split_candidates)
98-
split_1[1] = i
99-
ls_1 = split_loopset(ls, split_1)
100-
order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, cost_1, shouldinline_1 = choose_order_cost(ls_1)
101-
remaining_ops[1:ind-1] .= @view(split_candidates[1:ind-1]); remaining_ops[ind:end] .= @view(split_candidates[ind+1:end])
102-
ls_2 = split_loopset(ls, remaining_ops)
103-
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost(ls_2)
104-
# U_1 = T_1 = U_2 = T_2 = 2
105-
#@show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
106-
if cost_1 + cost_2 cost_fused
107-
ls_2_lowered = if length(remaining_ops) > 1
108-
inline = iszero(inline) ? (shouldinline_1 % Int) : inline
109-
lower_and_split_loops(ls_2, inline)
110-
else
111-
doinline = inlinedecision(inline, shouldinline_1 | shouldinline_2)
112-
lower(ls_2, order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, doinline)
113-
end
114-
return Expr(
115-
:block,
116-
ls.preamble,
117-
lower(ls_1, order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, false),
118-
ls_2_lowered,
119-
nothing
120-
)
121-
end
92+
split_candidates = returned_ops(ls)
93+
length(split_candidates) > 1 || return lower(ls, inline)
94+
order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, cost_fused, shouldinline_fused = choose_order_cost(ls)
95+
remaining_ops = Vector{Int}(undef, length(split_candidates) - 1); split_1 = Int[0];
96+
# for (ind,i) ∈ enumerate(split_candidates)
97+
for (ind,i) enumerate(split_candidates)
98+
split_1[1] = i
99+
ls_1 = split_loopset(ls, split_1)
100+
order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, cost_1, shouldinline_1 = choose_order_cost(ls_1)
101+
remaining_ops[1:ind-1] .= @view(split_candidates[1:ind-1]); remaining_ops[ind:end] .= @view(split_candidates[ind+1:end])
102+
ls_2 = split_loopset(ls, remaining_ops)
103+
order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, cost_2, shouldinline_2 = choose_order_cost(ls_2)
104+
# U_1 = T_1 = U_2 = T_2 = 2
105+
# return ls_1, ls_2
106+
# @show cost_1 + cost_2 ≤ cost_fused, cost_1, cost_2, cost_fused
107+
if cost_1 + cost_2 0.9cost_fused
108+
ls_2_lowered = if length(remaining_ops) > 1
109+
inline = iszero(inline) ? (shouldinline_1 % Int) : inline
110+
lower_and_split_loops(ls_2, inline)
111+
else
112+
doinline = inlinedecision(inline, shouldinline_1 | shouldinline_2)
113+
lower(ls_2, order_2, unrolled_2, tiled_2, vectorized_2, U_2, T_2, doinline)
114+
end
115+
return Expr(
116+
:block,
117+
ls.preamble,
118+
lower(ls_1, order_1, unrolled_1, tiled_1, vectorized_1, U_1, T_1, false),
119+
ls_2_lowered,
120+
nothing
121+
)
122122
end
123-
doinline = inlinedecision(inline, shouldinline_fused)
124-
lower(ls, order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, doinline)
123+
length(split_candidates) == 2 && break
124+
end
125+
doinline = inlinedecision(inline, shouldinline_fused)
126+
lower(ls, order_fused, unrolled_fused, tiled_fused, vectorized_fused, U_fused, T_fused, doinline)
125127
end
126128

127129

0 commit comments

Comments
 (0)