Skip to content

Commit 6f84d62

Browse files
committed
Optimize step checks slightly.
1 parent a232255 commit 6f84d62

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

src/modeling/graphs.jl

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,13 @@ staticmulincr(ptr, incr) = Expr(:call, lv(:staticmul), Expr(:call, :eltype, ptr)
207207
# @inline cmpend(i::Int, r::AbstractRange) = i ≤ vsub_fast(last(r), step(r))
208208

209209
@inline vcmpend(i::Int, r::CloseOpen, ::StaticInt{W}) where {W} = i vsub_fast(getfield(r,:upper), W)
210+
@inline vcmpendzs(i::Int, r::CloseOpen, ::StaticInt{W}) where {W} = i (getfield(r,:upper) & (-W))
210211
@inline vcmpend(i::Int, r::AbstractUnitRange, ::StaticInt{W}) where {W} = i vsub_fast(last(r), W-1)
212+
@inline vcmpendzs(i::Int, r::AbstractUnitRange, ::StaticInt{W}) where {W} = i (length(r) & (-W))
211213
# i = 0
212214
# i += 4*3 # i = 12
213215
@inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i vsub_fast(last(r), vsub_fast(W*step(r), 1))
216+
@inline vcmpendzs(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i vsub_fast(last(r), vsub_fast(W*step(r), 1))
214217
# @inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i ≤ vsub_fast(last(r), W*step(r))
215218
# @inline vcmpend(i::Int, r::AbstractRange, ::StaticInt{W}) where {W} = i ≤ vsub_fast(last(r), W)
216219

@@ -225,17 +228,19 @@ function staticloopexpr(loop::Loop)
225228
end
226229
end
227230
function vec_looprange(loop::Loop, UF::Int, mangledname)
231+
fast = ispow2(UF) && iszero(first(loop))
228232
if loop.rangesym === Symbol("") # means loop is static
229-
vec_looprange(UF, mangledname, staticloopexpr(loop))
233+
vec_looprange(UF, mangledname, staticloopexpr(loop), fast)
230234
else
231-
vec_looprange(UF, mangledname, loop.rangesym)
235+
vec_looprange(UF, mangledname, loop.rangesym, fast)
232236
end
233237
end
234-
function vec_looprange(UF::Int, mangledname, r::Union{Expr,Symbol})
238+
function vec_looprange(UF::Int, mangledname, r::Union{Expr,Symbol}, zerostart::Bool)
239+
cmp = zerostart ? lv(:vcmpendzs) : lv(:vcmpend)
235240
if isone(UF)
236-
Expr(:call, lv(:vcmpend), mangledname, r, VECTORWIDTHSYMBOL)
241+
Expr(:call, cmp, mangledname, r, VECTORWIDTHSYMBOL)
237242
else
238-
Expr(:call, lv(:vcmpend), mangledname, r, mulexpr(VECTORWIDTHSYMBOL, UF))
243+
Expr(:call, cmp, mangledname, r, mulexpr(VECTORWIDTHSYMBOL, UF))
239244
end
240245
end
241246
function looprange(loop::Loop, UF::Int, mangledname)

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ Execute an `@avx` block. The block's code is represented via the arguments:
672672
@aggressive_constprop @generated function _avx_!(
673673
::Val{var"#UNROLL#"}, ::Val{var"#OPS#"}, ::Val{var"#ARF#"}, ::Val{var"#AM#"}, ::Val{var"#LPSYM#"}, var"#lv#tuple#args#"::Tuple{var"#LB#",var"#V#"}
674674
) where {var"#UNROLL#", var"#OPS#", var"#ARF#", var"#AM#", var"#LPSYM#", var"#LB#", var"#V#"}
675-
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
675+
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
676676
ls = _avx_loopset(var"#OPS#", var"#ARF#", var"#AM#", var"#LPSYM#", var"#LB#".parameters, var"#V#".parameters, var"#UNROLL#")
677677
# return @show avx_body(ls, var"#UNROLL#")
678678
if last(var"#UNROLL#") > 1

0 commit comments

Comments
 (0)