236
236
function solve_tilesize (X, R, Umax, Tmax)
237
237
first (R) == 0 && return - 1 ,- 1 ,Inf # solve_smalltilesize(X, R, Umax, Tmax)
238
238
U, T, cost = solve_tilesize (X, R)
239
+ T -= T & 1
240
+ U = min (U, T)
239
241
U_too_large = U > Umax
240
242
T_too_large = T > Tmax
241
243
if U_too_large
@@ -257,7 +259,7 @@ function solve_tilesize(
257
259
cost_vec:: AbstractVector{Float64} = @view (ls. cost_vec[:,1 ]),
258
260
reg_pressure:: AbstractVector{Int} = @view (ls. reg_pres[:,1 ])
259
261
)
260
- maxT = 4
262
+ maxT = 8
261
263
maxU = 8
262
264
if isstaticloop (ls, tiled)
263
265
maxT = min (maxT, looprangehint (ls, tiled))
@@ -436,7 +438,8 @@ function choose_order(ls::LoopSet)
436
438
end
437
439
uorder, uvec, uc = choose_unroll_order (ls, tc)
438
440
if num_loops (ls) > 1 && tc ≤ uc
439
- return torder, tvec, tU, tT
441
+ return torder, tvec, min (tU, tT), tT
442
+ # return torder, tvec, 4, 4#5, 5
440
443
else
441
444
return uorder, uvec, determine_unroll_factor (ls, uorder, first (uorder), uvec), - 1
442
445
end
0 commit comments