Skip to content

Commit 1e35018

Browse files
committed
Limit single-unrolling based on number of available registers
Set reg - const to at least 8 for single unroll no reductions
1 parent 93d2be2 commit 1e35018

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

src/modeling/determinestrategy.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,16 +240,18 @@ function unroll_no_reductions(ls, order, vloopsym)
240240
end
241241
# latency not a concern, because no depchains
242242
compute_l = 0.0
243-
# rp = 0
243+
rpp = 0 # register pressure proportional to unrolling
244+
rpc = 0 # register pressure independent of unroll factor
244245
for op operations(ls)
245246
isu₁unrolled(op) || continue
246247
rt, sl, rpop = cost(ls, op, (unrolled,Symbol("")), vloopsym, Wshift, size_T)
247-
# rp += rpop
248248
if iscompute(op)
249249
compute_rt += rt
250250
compute_l += sl
251+
rpc += rpop # constant loads for special functions reused with unrolling
251252
elseif isload(op)
252253
load_rt += rt
254+
rpp += rpop # loads are proportional to unrolling
253255
elseif isstore(op)
254256
store_rt += rt
255257
end
@@ -277,7 +279,9 @@ function unroll_no_reductions(ls, order, vloopsym)
277279
if unrolled === vloopsym
278280
u = demote_unroll_factor(ls, u, vloopsym)
279281
end
280-
u, unrolled
282+
remaining_reg = max(8, (reg_count(ls) - round(Int,rpc))) # spilling a few consts isn't so bad
283+
reg_constraint = max(1, remaining_reg ÷ round(Int,rpp))
284+
clamp(u, 1, reg_constraint), unrolled
281285
# rt = max(compute_rt, load_rt + store_rt)
282286
# # (iszero(rt) ? 4 : max(1, roundpow2( min( 4, round(Int, 16 / rt) ) ))), unrolled
283287
# (iszero(rt) ? 4 : max(1, VectorizationBase.nextpow2( min( 4, round(Int, 8 / rt) ) ))), unrolled

0 commit comments

Comments
 (0)