@@ -270,6 +270,12 @@ function solve_tilesize(
270
270
solve_tilesize (cost_vec, reg_pressure, maxU, maxT)
271
271
end
272
272
273
+ function set_for_each_parent! (adal:: Vector{T} , op:: Operation , val:: T ) where {T}
274
+ @inbounds for opp ∈ parents (op)
275
+ adal[identifier (opp)] = val
276
+ end
277
+ end
278
+
273
279
# Just tile outer two loops?
274
280
# But optimal order within tile must still be determined
275
281
# as well as size of the tiles.
@@ -280,7 +286,13 @@ function evaluate_cost_tile(
280
286
@assert N ≥ 2 " Cannot tile merely $N loops!"
281
287
tiled = order[1 ]
282
288
unrolled = order[2 ]
283
- included_vars = fill (false , length (operations (ls)))
289
+ ops = operations (ls)
290
+ nops = length (ops)
291
+ included_vars = fill (false , nops)
292
+ unrolledtiled = fill (false , 2 , nops)
293
+ descendentsininnerloop = fill (false , nops)
294
+ innerloop = last (order)
295
+ iters = fill (- 99.9 , nops)
284
296
nested_loop_syms = Symbol[]# Set{Symbol}()
285
297
iter = 1.0
286
298
# Need to check if fusion is possible
@@ -306,7 +318,7 @@ function evaluate_cost_tile(
306
318
iter *= Float64 (length (ls, itersym))
307
319
end
308
320
# check which vars we can define at this level of loop nest
309
- for (id, op) ∈ enumerate (operations (ls) )
321
+ for (id, op) ∈ enumerate (ops )
310
322
# isconstant(op) && continue
311
323
# @assert id == identifier(op)+1 # testing, for now
312
324
# won't define if already defined...
@@ -318,27 +330,37 @@ function evaluate_cost_tile(
318
330
rd = reduceddependencies (op)
319
331
hasintersection (rd, nested_loop_syms[1 : end - length (rd)]) && return 0 ,0 ,Inf
320
332
included_vars[id] = true
321
- rt, lat, rp = cost (op, vectorized, Wshift, size_T)
333
+ unrolledtiled[1 ,id] = unrolled ∈ loopdependencies (op)
334
+ unrolledtiled[2 ,id] = tiled ∈ loopdependencies (op)
335
+ iters[id] = iter
336
+ innerloop ∈ loopdependencies (op) && set_for_each_parent! (descendentsininnerloop, op, true )
337
+ end
338
+ end
339
+ for (id, op) ∈ enumerate (ops)
340
+ iters[id] == - 99.9 && continue
341
+ descendentsininnerloop[id] || continue
342
+ isunrolled = unrolledtiled[1 ,id]
343
+ istiled = unrolledtiled[2 ,id]
344
+ rt, lat, rp = cost (op, vectorized, Wshift, size_T)
322
345
# @show instruction(op), rt, lat, rp, iter
323
- rt *= iter
324
- isunrolled = unrolled ∈ loopdependencies (op)
325
- istiled = tiled ∈ loopdependencies (op)
346
+ rt *= iters[id]
326
347
# @show isunrolled, istiled
327
- if isunrolled && istiled # no cost decrease; cost must be repeated
328
- cost_vec[1 ] += rt
329
- reg_pressure[1 ] += rp
330
- elseif isunrolled # cost decreased by tiling
331
- cost_vec[2 ] += rt
332
- reg_pressure[2 ] += rp
333
- elseif istiled # cost decreased by unrolling
334
- cost_vec[3 ] += rt
335
- reg_pressure[3 ] += rp
336
- else # neither unrolled or tiled
337
- cost_vec[4 ] += rt
338
- reg_pressure[4 ] += rp
339
- end
348
+ if isunrolled && istiled # no cost decrease; cost must be repeated
349
+ cost_vec[1 ] += rt
350
+ reg_pressure[1 ] += rp
351
+ elseif isunrolled # cost decreased by tiling
352
+ cost_vec[2 ] += rt
353
+ reg_pressure[2 ] += rp
354
+ elseif istiled # cost decreased by unrolling
355
+ cost_vec[3 ] += rt
356
+ reg_pressure[3 ] += rp
357
+ else # neither unrolled or tiled
358
+ cost_vec[4 ] += rt
359
+ reg_pressure[4 ] += rp
340
360
end
341
361
end
362
+ # @show order, vectorized cost_vec reg_pressure
363
+ # @show solve_tilesize(ls, unrolled, tiled, cost_vec, reg_pressure)
342
364
solve_tilesize (ls, unrolled, tiled, cost_vec, reg_pressure)
343
365
end
344
366
0 commit comments