Skip to content

Commit 8258780

Browse files
committed
Unrolling tweaks
1 parent c71b0be commit 8258780

File tree

5 files changed

+4
-4
lines changed

5 files changed

+4
-4
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.12"
4+
version = "0.12.13"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-5.04 KB
Loading

docs/src/assets/bench_selfdot_v2.png

-6.75 KB
Loading

src/codegen/lowering.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
184184
# q = if align_loop
185185
# Expr(:block, align_inner_loop_expr(ls, us, loop), Expr(:while, tc, body))
186186
# elseif nisvectorized
187-
if loopisstatic && (isone(length(loop) ÷ W) || (n 3 && length(loop) 8W && allinteriorunrolled(ls, us, n)))
187+
if loopisstatic && (!ls.loadelimination) && (isone(length(loop) ÷ W) || (n 3 && length(loop) 8W && allinteriorunrolled(ls, us, n)))
188188
q = Expr(:block)
189189
for _ 1:(length(loop) ÷ W)
190190
push!(q.args, body)

src/modeling/determinestrategy.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ function solve_unroll(
580580
if isstaticloop(u₂loop)
581581
if u₂loopsym !== vloopsym && u₂L 4
582582
u₁ = max(1, solve_unroll_constT(reg_pressure, u₂L))
583-
u₁ = isstaticloop(u₁loop) ? maybedemotesize(u₁, u₁L) : u₁
583+
u₁ = isstaticloop(u₁loop) ? maybedemotesize(u₁, u₁loopsym === vloopsym ? cld(u₁L,W) : u₁L) : u₁
584584
return u₁, u₂L, unroll_cost(cost_vec, u₁, u₂L, u₁L, u₂L)
585585
end
586586
u₂Ltemp = u₂loopsym === vloopsym ? cld(u₂L, W) : u₂L
@@ -589,7 +589,7 @@ function solve_unroll(
589589
if isstaticloop(u₁loop)
590590
if u₁loopsym !== vloopsym && u₁L 4
591591
u₂ = max(1, solve_unroll_constU(reg_pressure, u₁L))
592-
u₂ = isstaticloop(u₂loop) ? maybedemotesize(u₂, u₂L) : u₂
592+
u₂ = isstaticloop(u₂loop) ? maybedemotesize(u₂, u₂loopsym === vloopsym ? cld(u₂L,W) : u₂L) : u₂
593593
return u₁L, u₂, unroll_cost(cost_vec, u₁L, u₂, u₁L, u₂L)
594594
end
595595
u₁Ltemp = u₁loopsym === vloopsym ? cld(u₁L, W) : u₁L

0 commit comments

Comments
 (0)