Skip to content

Commit f19f906

Browse files
committed
Reduce unrolling in some static cases.
1 parent e0064f1 commit f19f906

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.9.5"
4+
version = "0.9.6"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -20,7 +20,7 @@ IfElse = "0.1"
2020
OffsetArrays = "1"
2121
SLEEFPirates = "0.6"
2222
UnPack = "1"
23-
VectorizationBase = "0.13.8"
23+
VectorizationBase = "0.13.10"
2424
julia = "1.5"
2525

2626
[extras]

src/lowering.jl

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,13 +264,21 @@ end
264264
# end
265265

266266
function allinteriorunrolled(ls::LoopSet, us::UnrollSpecification, N)
267+
unroll_total = 1
267268
for n 1:N-1
268269
loop = getloop(ls, names(ls)[n])
269270
nisvectorized = isvectorized(us, n)
270271
W = nisvectorized ? ls.vector_width[] : 1
271272
((length(loop) 8W) && (isstaticloop(loop) & (!iszero(W)))) || return false
273+
unroll_total *= cld(length(loop),W)
272274
end
273-
true
275+
if us.u₁loopnum > N
276+
unroll_total *= us.u₁
277+
end
278+
if us.u₂loopnum > N
279+
unroll_total *= us.u₂
280+
end
281+
unroll_total 8
274282
end
275283

276284
function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask::Bool)
@@ -296,7 +304,7 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
296304
# q = if align_loop
297305
# Expr(:block, align_inner_loop_expr(ls, us, loop), Expr(:while, tc, body))
298306
# elseif nisvectorized
299-
if loopisstatic && length(loop) 8W && allinteriorunrolled(ls, us, n)
307+
if loopisstatic && (isone(length(loop) ÷ W) || (n 3 && length(loop) 8W && allinteriorunrolled(ls, us, n)))
300308
q = Expr(:block)
301309
foreach(_ -> push!(q.args, body), 1:(length(loop) ÷ W))
302310
elseif nisvectorized

0 commit comments

Comments
 (0)