Skip to content

Commit 5f486c9

Browse files
committed
Fix unrolled stepranges
1 parent 4b15f2a commit 5f486c9

File tree

7 files changed

+825
-768
lines changed

7 files changed

+825
-768
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.59"
4+
version = "0.12.60"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/codegen/lower_compute.jl

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -62,43 +62,44 @@ function parent_unroll_status(op::Operation, u₁loop::Symbol, u₂loop::Symbol,
6262
end
6363

6464
function _add_loopvalue!(ex::Expr, loopval::Symbol, vloop::Loop, u::Int)
65-
vloopsym = vloop.itersymbol
66-
if loopval === vloopsym
67-
if iszero(u)
68-
push!(ex.args, _MMind(loopval, step(vloop)))
69-
else
70-
mm = _MMind(loopval, step(vloop))
71-
if isone(u)
72-
push!(ex.args, Expr(:call, lv(:vadd_nsw), VECTORWIDTHSYMBOL, mm))
73-
else
74-
push!(ex.args, Expr(:call, lv(:vadd_nsw), Expr(:call, lv(:vmul_nsw), VECTORWIDTHSYMBOL, u), mm))
75-
end
76-
end
77-
elseif u == 0
78-
push!(ex.args, loopval)
65+
vloopsym = vloop.itersymbol
66+
if loopval === vloopsym
67+
if iszero(u)
68+
push!(ex.args, _MMind(loopval, step(vloop)))
7969
else
80-
push!(ex.args, Expr(:call, lv(:vadd_nsw), loopval, staticexpr(u)))
70+
vstep = step(vloop)
71+
mm = _MMind(loopval, vstep)
72+
if isone(u) & isone(vstep)
73+
push!(ex.args, Expr(:call, lv(:vadd_nsw), VECTORWIDTHSYMBOL, mm))
74+
else
75+
push!(ex.args, Expr(:call, lv(:vadd_nsw), mulexpr(VECTORWIDTHSYMBOL, u, vstep), mm))
76+
end
8177
end
78+
elseif u == 0
79+
push!(ex.args, loopval)
80+
else
81+
push!(ex.args, Expr(:call, lv(:vadd_nsw), loopval, staticexpr(u)))
82+
end
8283
end
8384
function add_loopvalue!(instrcall::Expr, loopval, ua::UnrollArgs, u₁::Int)
84-
@unpack u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
85-
if loopval === u₁loopsym #parentsunrolled[n]
86-
if isone(u₁)
87-
_add_loopvalue!(instrcall, loopval, vloop, 0)
88-
else
89-
t = Expr(:tuple)
90-
for u 0:u₁-1
91-
_add_loopvalue!(t, loopval, vloop, u)
92-
end
93-
push!(instrcall.args, Expr(:call, lv(:VecUnroll), t))
94-
end
95-
elseif suffix > 0 && loopval === u₂loopsym
96-
_add_loopvalue!(instrcall, loopval, vloop, suffix)
97-
elseif loopval === vloopsym
98-
push!(instrcall.args, _MMind(loopval, step(vloop)))
85+
@unpack u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
86+
if loopval === u₁loopsym #parentsunrolled[n]
87+
if isone(u₁)
88+
_add_loopvalue!(instrcall, loopval, vloop, 0)
9989
else
100-
push!(instrcall.args, loopval)
90+
t = Expr(:tuple)
91+
for u 0:u₁-1
92+
_add_loopvalue!(t, loopval, vloop, u)
93+
end
94+
push!(instrcall.args, Expr(:call, lv(:VecUnroll), t))
10195
end
96+
elseif suffix > 0 && loopval === u₂loopsym
97+
_add_loopvalue!(instrcall, loopval, vloop, suffix)
98+
elseif loopval === vloopsym
99+
push!(instrcall.args, _MMind(loopval, step(vloop)))
100+
else
101+
push!(instrcall.args, loopval)
102+
end
102103
end
103104

104105
vecunrolllen(::Type{VecUnroll{N,W,T,V}}) where {N,W,T,V} = (N::Int + 1)

src/codegen/lowering.jl

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -412,31 +412,31 @@ function unrollremcomparison(ls::LoopSet, loop::Loop, UFt::Int, n::Int, nisvecto
412412
end
413413
end
414414
function loopvarremcomparison(loop::Loop, UFt::Int, nisvectorized::Bool, remfirst::Bool)
415-
loopsym = loop.itersymbol
416-
loopstep = loop.step
417-
if nisvectorized
418-
offset = mulexpr(VECTORWIDTHSYMBOL, UFt, loopstep)
419-
itercount = subexpr(last(loop), offset)
420-
Expr(:call, GlobalRef(Base,:>), loopsym, itercount)
421-
elseif remfirst # requires `isstaticloop(loop)`
422-
Expr(:call, GlobalRef(Base,:<), loopsym, gethint(first(loop)) + UFt*gethint(loopstep) - 1)
423-
elseif isknown(last(loop))
424-
if isknown(loopstep)
425-
Expr(:call, GlobalRef(Base,:>), loopsym, gethint(last(loop)) - UFt*gethint(loopstep))
426-
elseif isone(UFt)
427-
Expr(:call, GlobalRef(Base,:>), loopsym, subexpr(gethint(last(loop)), getsym(loopstep)))
428-
else
429-
Expr(:call, GlobalRef(Base,:>), loopsym, subexpr(gethint(last(loop)), mulexpr(getsym(loopstep), UFt)))
430-
end
415+
loopsym = loop.itersymbol
416+
loopstep = loop.step
417+
if nisvectorized
418+
offset = mulexpr(VECTORWIDTHSYMBOL, UFt, loopstep)
419+
itercount = subexpr(last(loop), offset)
420+
Expr(:call, GlobalRef(Base,:>), loopsym, itercount)
421+
elseif remfirst # requires `isstaticloop(loop)`
422+
Expr(:call, GlobalRef(Base,:<), loopsym, gethint(first(loop)) + UFt*gethint(loopstep) - 1)
423+
elseif isknown(last(loop))
424+
if isknown(loopstep)
425+
Expr(:call, GlobalRef(Base,:>), loopsym, gethint(last(loop)) - UFt*gethint(loopstep))
426+
elseif isone(UFt)
427+
Expr(:call, GlobalRef(Base,:>), loopsym, subexpr(gethint(last(loop)), getsym(loopstep)))
431428
else
432-
if isknown(loopstep)
433-
Expr(:call, GlobalRef(Base,:>), loopsym, Expr(:call, lv(:vsub_nsw), getsym(last(loop)), UFt*gethint(loopstep)))
434-
elseif isone(UFt)
435-
Expr(:call, GlobalRef(Base,:>), loopsym, Expr(:call, lv(:vsub_nsw), getsym(last(loop)), getsym(loopstep)))
436-
else
437-
Expr(:call, GlobalRef(Base,:>), loopsym, Expr(:call, lv(:vsub_nsw), getsym(last(loop)), mulexpr(getsym(loopstep), UFt)))
438-
end
429+
Expr(:call, GlobalRef(Base,:>), loopsym, subexpr(gethint(last(loop)), mulexpr(getsym(loopstep), UFt)))
439430
end
431+
else
432+
if isknown(loopstep)
433+
Expr(:call, GlobalRef(Base,:>), loopsym, Expr(:call, lv(:vsub_nsw), getsym(last(loop)), UFt*gethint(loopstep)))
434+
elseif isone(UFt)
435+
Expr(:call, GlobalRef(Base,:>), loopsym, Expr(:call, lv(:vsub_nsw), getsym(last(loop)), getsym(loopstep)))
436+
else
437+
Expr(:call, GlobalRef(Base,:>), loopsym, Expr(:call, lv(:vsub_nsw), getsym(last(loop)), mulexpr(getsym(loopstep), UFt)))
438+
end
439+
end
440440
end
441441
function pointerremcomparison(ls::LoopSet, termind::Int, UFt::Int, n::Int, nisvectorized::Bool, remfirst::Bool, loop::Loop)
442442
lssm = ls.lssm

0 commit comments

Comments
 (0)