Skip to content

Commit fb7bf0e

Browse files
committed
Unroll fix.
1 parent 77aa28b commit fb7bf0e

File tree

2 files changed

+2
-6
lines changed

2 files changed

+2
-6
lines changed

src/determinestrategy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ function solve_tilesize(
244244
)
245245
maxT = isstaticloop(ls, tiled) ? looprangehint(ls, tiled) : 8#REGISTER_COUNT
246246
maxU = isstaticloop(ls, unrolled) ? looprangehint(ls, unrolled) : 4#REGISTER_COUNT
247-
solve_tilesize(cost_vec, reg_pressure, maxT, maxU)
247+
solve_tilesize(cost_vec, reg_pressure, maxU, maxT)
248248
end
249249

250250
# Just tile outer two loops?

test/runtests.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,7 @@ gemmq = :(for i ∈ 1:size(A,1), j ∈ 1:size(B,2)
4343
end)
4444

4545
lsgemm = LoopVectorization.LoopSet(gemmq);
46-
U, T = if LoopVectorization.VectorizationBase.REGISTER_COUNT == 16
47-
(3,4)
48-
else
49-
(4,6)
50-
end
46+
U, T = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3,4) : (4, 6)
5147
@test LoopVectorization.choose_order(lsgemm) == (Symbol[:j,:i,:k], U, T)
5248

5349
function mygemm!(C, A, B)

0 commit comments

Comments
 (0)