Skip to content

Commit b5cf11b

Browse files
committed
Maybedemotesizes in full static loop unroll conditions.
1 parent e77b070 commit b5cf11b

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

src/determinestrategy.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ function solve_unroll(
472472
if isstaticloop(u₂loop)
473473
if u₂loopsym !== vectorized && u₂L 4
474474
u₁ = max(1, solve_unroll_constT(reg_pressure, u₂L))
475-
u₁ = isstaticloop(u₁loop) ? min(u₁, u₁L) : u₁
475+
u₁ = isstaticloop(u₁loop) ? maybedemotesize(u₁, u₁L) : u₁
476476
return u₁, u₂L, unroll_cost(cost_vec, u₁, u₂L, u₁L, u₂L)
477477
end
478478
u₂L = u₂loopsym === vectorized ? cld(u₂L,W) : u₂L
@@ -481,7 +481,7 @@ function solve_unroll(
481481
if isstaticloop(u₁loop)
482482
if u₁loopsym !== vectorized && u₁L 4
483483
u₂ = max(1, solve_unroll_constU(reg_pressure, u₁L))
484-
u₂ = isstaticloop(u₂loop) ? min(u₂, u₂L) : u₂
484+
u₂ = isstaticloop(u₂loop) ? maybedemotesize(u₂, u₂L) : u₂
485485
return u₁L, u₂, unroll_cost(cost_vec, u₁L, u₂, u₁L, u₂L)
486486
end
487487
u₁L = u₁loopsym === vectorized ? cld(u₁L,W) : u₁L

src/memory_ops_common.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ function subset_vptr!(ls::LoopSet, vptr::Symbol, indnum::Int, ind, previndices,
7272
if loopindex[i]
7373
append_loop_valdims!(valcall, getloop(ls, previndices[i+offset]))
7474
else
75-
for loopdep loopdependencies(ls.opdict[previndices[i+offset]])
75+
# assumes all valdims will be of equal length once expanded...
76+
# A[I + J, constindex], I and J may be CartesianIndices. This requires they all be of same number of dims
77+
let loopdep = first(loopdependencies(ls.opdict[previndices[i+offset]]))
7678
append_loop_valdims!(valcall, getloop(ls, loopdep))
7779
end
7880
end

src/reconstruct_loopset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ Execute an `@avx` block. The block's code is represented via the arguments:
492492
- `vargs...` holds the encoded pointers of all the arrays (see `VectorizationBase`'s various pointer types).
493493
"""
494494
@generated function _avx_!(::Val{UNROLL}, ::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM}, lb::LB, vargs...) where {UNROLL, OPS, ARF, AM, LPSYM, LB}
495-
1 + 1 # Irrelevant line you can comment out/in to force recompilation...
495+
# 1 + 1 # Irrelevant line you can comment out/in to force recompilation...
496496
ls = _avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, vargs)
497497
# @show avx_body(ls, UNROLL)
498498
# @show UNROLL, OPS, ARF, AM, LPSYM, LB

0 commit comments

Comments
 (0)