Skip to content

Commit e5b969f

Browse files
committed
Fixed bug in determining tiling cost.
1 parent d21d018 commit e5b969f

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

src/determinestrategy.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ function evaluate_cost_tile(
239239
# Add to set of defined symbles
240240
push!(nested_loop_syms, itersym)
241241
if n == 1
242-
iter = length(ls, itersym) * length(ls, order[2]) / N
242+
iter = length(ls, itersym) * length(ls, order[2]) / W
243243
elseif n > 2
244244
iter *= Float64(length(ls, itersym))
245245
end
@@ -279,6 +279,7 @@ function evaluate_cost_tile(
279279
end
280280
Tstatic = isstaticloop(ls, tiled)
281281
Ustatic = isstaticloop(ls, unrolled)
282+
# @show order, cost_vec, reg_pressure
282283
if Tstatic
283284
if Ustatic
284285
solve_tilesize(cost_vec, reg_pressure, looprangehint(ls, tiled), looprangehint(ls, unrolled))

test/runtests.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,13 @@ else
5454
(5,5)
5555
end
5656
@test LoopVectorization.choose_order(lsgemm) == (Symbol[:j,:i,:k], U, T)
57+
LoopVectorization.choose_order(lsgemm)
5758
LoopVectorization.lower(lsgemm)
5859
lsgemm.operations
5960

61+
LoopVectorization.choose_tile(lsgemm)
62+
LoopVectorization.choose_unroll_order(lsgemm)
63+
6064
ops = LoopVectorization.oporder(lsgemm);
6165
findall(length.(ops) .!= 0)
6266

0 commit comments

Comments
 (0)