Skip to content

Commit 1f67453

Browse files
committed
Try additionally unrolling innermost non-unrolled non-vectorized loops.
1 parent f53d6c6 commit 1f67453

File tree

5 files changed

+40
-7
lines changed

5 files changed

+40
-7
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1515
[compat]
1616
DocStringExtensions = "0.8"
1717
OffsetArrays = "1"
18-
SIMDPirates = "0.7.16"
18+
SIMDPirates = "0.7.20"
1919
SLEEFPirates = "0.4.4"
2020
UnPack = "0"
2121
VectorizationBase = "0.11.2"

src/determinestrategy.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,7 @@ function choose_tile(ls::LoopSet)
860860
new_order, state = iter
861861
end
862862
end
863+
ls.loadelimination[] = shouldinline
863864
best_order, bestu₁, bestu₂, best_vec, u₁, u₂, lowest_cost, shouldinline
864865
end
865866
# Last in order is the inner most loop

src/graphs.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ struct LoopSet
182182
included_vars::Vector{Bool}
183183
place_after_loop::Vector{Bool}
184184
unrollspecification::Base.RefValue{UnrollSpecification}
185+
loadelimination::Base.RefValue{Bool}
185186
mod::Symbol
186187
end
187188

@@ -280,7 +281,7 @@ function LoopSet(mod::Symbol)
280281
ArrayReferenceMeta[],
281282
Matrix{Float64}(undef, 4, 2),
282283
Matrix{Float64}(undef, 4, 2),
283-
Bool[], Bool[], Ref{UnrollSpecification}(), mod
284+
Bool[], Bool[], Ref{UnrollSpecification}(), Ref(false), mod
284285
)
285286
end
286287

src/lowering.jl

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,14 +145,46 @@ function lower_block(
145145
push!(blockq.args, incrementloopcounter(us, n, loopsym, UF))
146146
blockq
147147
end
148+
149+
function lower_llvm_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, loop::Loop)
150+
loopsym = names(ls)[n]
151+
loop = getloop(ls, loopsym)
152+
# loopsym = mangletiledsym(loopsym, us, n)
153+
nisvectorized = false#isvectorized(us, n)
154+
sl = startloop(loop, nisvectorized, loopsym)
155+
# tc = terminatecondition(loop, us, n, loopsym, inclmask, 1)
156+
looprange = if loop.startexact
157+
if loop.stopexact
158+
Expr(:(=), loopsym, Expr(:call, :(:), loop.starthint, loop.stophint))
159+
else
160+
Expr(:(=), loopsym, Expr(:call, :(:), loop.starthint, loop.stopsym))
161+
end
162+
elseif loop.stopexact
163+
Expr(:(=), loopsym, Expr(:call, :(:), loop.startsym, loop.stophint))
164+
else
165+
Expr(:(=), loopsym, Expr(:call, :(:), loop.startsym, loop.stopsym))
166+
end
167+
body = lower_block(ls, us, n, false, 1)
168+
push!(body.args, Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 4)))
169+
# q = Expr( :block, sl, Expr(:while, tc, body))
170+
q = Expr(:for, looprange, body)
171+
# if nisvectorized
172+
# tc = terminatecondition(loop, us, n, loopsym, true, 1)
173+
# body = lower_block(ls, us, n, true, 1)
174+
# push!(q.args, Expr(:if, tc, body))
175+
# end
176+
q
177+
end
148178
# tiledsym(s::Symbol) = Symbol("##outer##", s, "##outer##")
149179
# mangletiledsym(s::Symbol, us::UnrollSpecification, n::Int) = isunrolled2(us, n) ? tiledsym(s) : s
150180
function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask::Bool)
181+
usorig = ls.unrollspecification[]
182+
nisvectorized = isvectorized(us, n)
151183
loopsym = names(ls)[n]
152184
loop = getloop(ls, loopsym)
153-
# loopsym = mangletiledsym(loopsym, us, n)
154-
nisvectorized = isvectorized(us, n)
155-
185+
if VERSION v"1.4" && !nisvectorized && !inclmask && isone(n) && !ls.loadelimination[] && (us.u₁ > 1) && (usorig.u₁ == us.u₁) && (usorig.u₂ == us.u₂) && length(loop) > 7
186+
return lower_llvm_unroll(ls, us, n, loop)
187+
end
156188
sl = startloop(loop, nisvectorized, loopsym)
157189
tc = terminatecondition(loop, us, n, loopsym, inclmask, 1)
158190
body = lower_block(ls, us, n, inclmask, 1)
@@ -171,7 +203,6 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
171203
order = names(ls)
172204
loopsym = order[n]
173205
loop = getloop(ls, loopsym)
174-
# loopsym = mangletiledsym(loopsym, us, n)
175206
vectorized = order[vectorizedloopnum]
176207
nisunrolled = isunrolled1(us, n)
177208
nisvectorized = isvectorized(us, n)

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ Execute an `@avx` block. The block's code is represented via the arguments:
461461
- `vargs...` holds the encoded pointers of all the arrays (see `VectorizationBase`'s various pointer types).
462462
"""
463463
@generated function _avx_!(::Val{UNROLL}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB, vargs...) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
464-
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
464+
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
465465
ls = _avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, vargs)
466466
# @show avx_body(ls, UNROLL)
467467
avx_body(ls, UNROLL)

0 commit comments

Comments
 (0)