@@ -208,18 +208,18 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
208
208
# end
209
209
end
210
210
function lower_unrolled_dynamic (ls:: LoopSet , us:: UnrollSpecification , n:: Int , inclmask:: Bool )
211
- UF = unrollfactor (us, n)
212
- isone (UF) && return lower_no_unroll (ls, us, n, inclmask)
213
- @unpack u₁loopnum, vloopnum, u₁, u₂ = us
214
- order = names (ls)
215
- loopsym = order[n]
216
- loop = getloop (ls, n)
217
- vectorized = order[vloopnum]
218
- nisunrolled = isunrolled1 (us, n)
219
- nisvectorized = isvectorized (us, n)
220
- W = nisvectorized ? ls. vector_width : 1
221
- UFW = UF * W
222
- looplength = length (loop)
211
+ UF = unrollfactor (us, n)
212
+ isone (UF) && return lower_no_unroll (ls, us, n, inclmask)
213
+ @unpack u₁loopnum, vloopnum, u₁, u₂ = us
214
+ order = names (ls)
215
+ loopsym = order[n]
216
+ loop = getloop (ls, n)
217
+ vectorized = order[vloopnum]
218
+ nisunrolled = isunrolled1 (us, n)
219
+ nisvectorized = isvectorized (us, n)
220
+ W = nisvectorized ? ls. vector_width : 1
221
+ UFW = UF * W
222
+ looplength = length (loop)
223
223
if W ≠ 0 & isknown (first (loop)) & isknown (step (loop))
224
224
loopisstatic = isknown (last (loop))
225
225
# something other than the default hint currently means an UpperBoundedInteger was passed as an argument
@@ -228,159 +228,163 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
228
228
loopisstatic = false
229
229
loopisbounded = false
230
230
end
231
- if loopisstatic & loopisbounded
232
- UFWnew = cld (looplength, cld (looplength, UFW))
233
- UF = cld (UFWnew, W)
234
- UFW = UF * W
235
- us = nisunrolled ? UnrollSpecification (us, UF, u₂) : UnrollSpecification (us, u₁, UF)
236
- end
237
- remmask = inclmask | nisvectorized
238
- Ureduct = (n == num_loops (ls) && (u₂ == - 1 )) ? ureduct (ls) : - 1
239
- sl = startloop (ls, us, n, false )
240
- UFt = loopisstatic ? cld (looplength % UFW, W) : 1
241
- # Don't place remainder first if we're going to have to mask this loop (i.e., if this loop is vectorized)
242
- remfirst = loopisstatic & (! nisvectorized) & (UFt > 0 ) & ! (unsigned (Ureduct) < unsigned (UF))
243
- tc = terminatecondition (ls, us, n, inclmask, remfirst ? 1 : UF)
244
- # usorig = ls.unrollspecification
231
+ Ureduct = (n == num_loops (ls) && (u₂ == - 1 )) ? ureduct (ls) : - 1
232
+ # for now, require loopisstatic or !Ureduct-ing for reducing UF
233
+ if loopisbounded & (loopisstatic | (Ureduct < 0 ))
234
+ UFWnew = cld (looplength, cld (looplength, UFW))
235
+ UF = cld (UFWnew, W)
236
+ UFW = UF * W
237
+ us = nisunrolled ? UnrollSpecification (us, UF, u₂) : UnrollSpecification (us, u₁, UF)
238
+ # if (!loopisstatic) & (Ureduct ≥ UF)
239
+ # ls.ureduct = Ureduct = UF >> 1
240
+ # end
241
+ end
242
+ remmask = inclmask | nisvectorized
243
+ sl = startloop (ls, us, n, false )
244
+ UFt = loopisstatic ? cld (looplength % UFW, W) : 1
245
+ # Don't place remainder first if we're going to have to mask this loop (i.e., if this loop is vectorized)
246
+ remfirst = loopisstatic & (! nisvectorized) & (UFt > 0 ) & ! (unsigned (Ureduct) < unsigned (UF))
247
+ tc = terminatecondition (ls, us, n, inclmask, remfirst ? 1 : UF)
248
+ # usorig = ls.unrollspecification
245
249
# tc = (usorig.u₁ == us.u₁) && (usorig.u₂ == us.u₂) && !loopisstatic && !inclmask && !ls.loadelimination ? expect(tc) : tc
246
250
# Don't need to create the body if loop is dynamic and bounded
247
251
dynamicbounded = ((! loopisstatic) & loopisbounded)
248
252
body = dynamicbounded ? tc : lower_block (ls, us, n, inclmask, UF)
249
253
if loopisstatic
250
- iters = length (loop) ÷ UFW
251
- if (iters ≤ 1 ) || (iters* UF ≤ 16 && allinteriorunrolled (ls, us, n))# Let's set a limit on total unrolling
252
- q = Expr (:block )
253
- for _ ∈ 1 : iters
254
- push! (q. args, body)
255
- end
256
- else
257
- q = Expr (:while , tc, body)
258
- end
259
- remblock = Expr (:block )
260
- (nisvectorized && (UFt > 0 ) && isone (num_loops (ls))) && push! (remblock. args, definemask (loop))
261
- unroll_cleanup = true
262
- else
263
- remblock = init_remblock (loop, ls. lssm, n)# loopsym)
264
- # unroll_cleanup = Ureduct > 0 || (nisunrolled ? (u₂ > 1) : (u₁ > 1))
265
- # remblock = unroll_cleanup ? init_remblock(loop, ls.lssm, n)#loopsym) : Expr(:block)
266
- q = if loopisbounded
267
- Expr (:block )
268
- elseif unsigned (Ureduct) < unsigned (UF)
269
- # push!(body.args, Expr(:(||), tc, Expr(:break)))
270
- # Expr(:while, true, body)
271
- termcond = gensym (:maybeterm )
272
- push! (body. args, Expr (:(= ), termcond, tc))
273
- Expr (:block , Expr (:(= ), termcond, true ), Expr (:while , termcond, body))
274
- else
275
- Expr (:while , tc, body)
254
+ iters = length (loop) ÷ UFW
255
+ if (iters ≤ 1 ) || (iters* UF ≤ 16 && allinteriorunrolled (ls, us, n))# Let's set a limit on total unrolling
256
+ q = Expr (:block )
257
+ for _ ∈ 1 : iters
258
+ push! (q. args, body)
276
259
end
260
+ else
261
+ q = Expr (:while , tc, body)
262
+ end
263
+ remblock = Expr (:block )
264
+ (nisvectorized && (UFt > 0 ) && isone (num_loops (ls))) && push! (remblock. args, definemask (loop))
265
+ unroll_cleanup = true
266
+ else
267
+ remblock = init_remblock (loop, ls. lssm, n)# loopsym)
268
+ # unroll_cleanup = Ureduct > 0 || (nisunrolled ? (u₂ > 1) : (u₁ > 1))
269
+ # remblock = unroll_cleanup ? init_remblock(loop, ls.lssm, n)#loopsym) : Expr(:block)
270
+ q = if loopisbounded
271
+ Expr (:block )
272
+ elseif unsigned (Ureduct) < unsigned (UF)
273
+ # push!(body.args, Expr(:(||), tc, Expr(:break)))
274
+ # Expr(:while, true, body)
275
+ termcond = gensym (:maybeterm )
276
+ push! (body. args, Expr (:(= ), termcond, tc))
277
+ Expr (:block , Expr (:(= ), termcond, true ), Expr (:while , termcond, body))
278
+ else
279
+ Expr (:while , tc, body)
280
+ end
277
281
end
278
- q = if unsigned (Ureduct) < unsigned (UF) # unsigned(-1) == typemax(UInt);
279
- add_cleanup = true
280
- if isone (Ureduct)
281
- UF_cleanup = 1
282
- if nisvectorized
283
- blockhead = :while
284
- else
285
- blockhead = if UF == 2
286
- if loopisstatic
287
- add_cleanup = UFt == 1
288
- :block
289
- else
290
- :if
291
- end
292
- else
293
- :while
294
- end
295
- UFt = 0
296
- end
297
- elseif 2 Ureduct < UF
298
- UF_cleanup = 2
299
- blockhead = :while
300
- else
301
- UF_cleanup = UF - Ureduct
302
- blockhead = :if
303
- end
304
- _q = if dynamicbounded
305
- initialize_outer_reductions! (q, ls, Ureduct); q
282
+ q = if unsigned (Ureduct) < unsigned (UF) # unsigned(-1) == typemax(UInt);
283
+ add_cleanup = true
284
+ if isone (Ureduct)
285
+ UF_cleanup = 1
286
+ if nisvectorized
287
+ blockhead = :while
288
+ else
289
+ blockhead = if UF == 2
290
+ if loopisstatic
291
+ add_cleanup = UFt == 1
292
+ :block
306
293
else
307
- Expr (:block , add_upper_outer_reductions (ls, q, Ureduct, UF, loop, nisvectorized))
308
- end
309
- if add_cleanup
310
- cleanup_expr = Expr (blockhead)
311
- blockhead === :block || push! (cleanup_expr. args, terminatecondition (ls, us, n, inclmask, UF_cleanup))
312
- us_cleanup = nisunrolled ? UnrollSpecification (us, UF_cleanup, u₂) : UnrollSpecification (us, u₁, UF_cleanup)
313
- push! (cleanup_expr. args, lower_block (ls, us_cleanup, n, inclmask, UF_cleanup))
314
- push! (_q. args, cleanup_expr)
294
+ :if
315
295
end
316
- UFt > 0 && push! (_q. args, remblock)
317
- _q
318
- elseif remfirst
319
- numiters = length (loop) ÷ UF
320
- if numiters > 2
321
- Expr ( :block , remblock, q )
322
296
else
323
- q = Expr (:block , remblock)
324
- for i ∈ 1 : numiters
325
- push! (q. args, body)
326
- end
327
- q
297
+ :while
328
298
end
329
- elseif iszero (UFt)
330
- Expr ( :block , q )
331
- elseif ! nisvectorized && ! loopisstatic && UF ≥ 10
332
- rem_uf = UF - 1
333
- UF = rem_uf >> 1
334
- UFt = rem_uf - UF
335
- ust = nisunrolled ? UnrollSpecification (us, UFt, u₂) : UnrollSpecification (us, u₁, UFt)
336
- newblock = lower_block (ls, ust, n, remmask, UFt)
337
- # comparison = unrollremcomparison(ls, loop, UFt, n, nisvectorized, remfirst)
338
- comparison = terminatecondition (ls, us, n, inclmask, UFt)
339
- UFt = 1
340
- UF += 1 - iseven (rem_uf)
341
- Expr ( :block , q, Expr (iseven (rem_uf) ? :while : :if , comparison, newblock), remblock )
299
+ UFt = 0
300
+ end
301
+ elseif 2 Ureduct < UF
302
+ UF_cleanup = 2
303
+ blockhead = :while
342
304
else
343
- # if (usorig.u₁ == us.u₁) && (usorig.u₂ == us.u₂) && !isstaticloop(loop) && !inclmask# && !ls.loadelimination
344
- # # Expr(:block, sl, assumeloopiteratesatleastonce(loop), Expr(:while, tc, body))
345
- # Expr(:block, sl, expect(tc), q, remblock)
346
- # else
347
- # Expr(:block, sl, q, remblock)
348
- # end
349
- Expr ( :block , q, remblock )
350
- end
351
- if ! iszero (UFt)
352
- # if unroll_cleanup
353
- iforelseif = :if
354
- while true
355
- ust = nisunrolled ? UnrollSpecification (us, UFt, u₂) : UnrollSpecification (us, u₁, UFt)
356
- newblock = lower_block (ls, ust, n, remmask, UFt)
357
- if (UFt ≥ UF - 1 + nisvectorized) || UFt == Ureduct || loopisstatic
358
- if isone (num_loops (ls)) && isone (UFt) && isone (Ureduct)
359
- newblock = Expr (:block , definemask (loop), newblock)
360
- end
361
- push! (remblock. args, newblock)
362
- break
363
- end
364
- comparison = unrollremcomparison (ls, loop, UFt, n, nisvectorized, remfirst)
365
- if isone (num_loops (ls)) && isone (UFt)
366
- remblocknew = Expr (:if , comparison, newblock)
367
- push! (remblock. args, Expr (:block , Expr (:let , definemask (loop), remblocknew)))
368
- remblock = remblocknew
369
- else
370
- remblocknew = Expr (iforelseif, comparison, newblock)
371
- # remblocknew = Expr(:elseif, comparison, newblock)
372
- push! (remblock. args, remblocknew)
373
- remblock = remblocknew
374
- iforelseif = :elseif
375
- end
376
- UFt += 1
305
+ UF_cleanup = UF - Ureduct
306
+ blockhead = :if
307
+ end
308
+ _q = if dynamicbounded
309
+ initialize_outer_reductions! (q, ls, Ureduct); q
310
+ else
311
+ Expr (:block , add_upper_outer_reductions (ls, q, Ureduct, UF, loop, nisvectorized))
312
+ end
313
+ if add_cleanup
314
+ cleanup_expr = Expr (blockhead)
315
+ blockhead === :block || push! (cleanup_expr. args, terminatecondition (ls, us, n, inclmask, UF_cleanup))
316
+ us_cleanup = nisunrolled ? UnrollSpecification (us, UF_cleanup, u₂) : UnrollSpecification (us, u₁, UF_cleanup)
317
+ push! (cleanup_expr. args, lower_block (ls, us_cleanup, n, inclmask, UF_cleanup))
318
+ push! (_q. args, cleanup_expr)
319
+ end
320
+ UFt > 0 && push! (_q. args, remblock)
321
+ _q
322
+ elseif remfirst
323
+ numiters = length (loop) ÷ UF
324
+ if numiters > 2
325
+ Expr ( :block , remblock, q )
326
+ else
327
+ q = Expr (:block , remblock)
328
+ for i ∈ 1 : numiters
329
+ push! (q. args, body)
330
+ end
331
+ q
332
+ end
333
+ elseif iszero (UFt)
334
+ Expr ( :block , q )
335
+ elseif ! nisvectorized && ! loopisstatic && UF ≥ 10
336
+ rem_uf = UF - 1
337
+ UF = rem_uf >> 1
338
+ UFt = rem_uf - UF
339
+ ust = nisunrolled ? UnrollSpecification (us, UFt, u₂) : UnrollSpecification (us, u₁, UFt)
340
+ newblock = lower_block (ls, ust, n, remmask, UFt)
341
+ # comparison = unrollremcomparison(ls, loop, UFt, n, nisvectorized, remfirst)
342
+ comparison = terminatecondition (ls, us, n, inclmask, UFt)
343
+ UFt = 1
344
+ UF += 1 - iseven (rem_uf)
345
+ Expr ( :block , q, Expr (iseven (rem_uf) ? :while : :if , comparison, newblock), remblock )
346
+ else
347
+ # if (usorig.u₁ == us.u₁) && (usorig.u₂ == us.u₂) && !isstaticloop(loop) && !inclmask# && !ls.loadelimination
348
+ # # Expr(:block, sl, assumeloopiteratesatleastonce(loop), Expr(:while, tc, body))
349
+ # Expr(:block, sl, expect(tc), q, remblock)
350
+ # else
351
+ # Expr(:block, sl, q, remblock)
352
+ # end
353
+ Expr ( :block , q, remblock )
354
+ end
355
+ if ! iszero (UFt)
356
+ # if unroll_cleanup
357
+ iforelseif = :if
358
+ while true
359
+ ust = nisunrolled ? UnrollSpecification (us, UFt, u₂) : UnrollSpecification (us, u₁, UFt)
360
+ newblock = lower_block (ls, ust, n, remmask, UFt)
361
+ if (UFt ≥ UF - 1 + nisvectorized) || UFt == Ureduct || loopisstatic
362
+ if isone (num_loops (ls)) && isone (UFt) && isone (Ureduct)
363
+ newblock = Expr (:block , definemask (loop), newblock)
377
364
end
378
- # else
379
- # ust = nisunrolled ? UnrollSpecification(us, 1, u₂) : UnrollSpecification(us, u₁, 1)
380
- # # newblock = lower_block(ls, ust, n, remmask, 1)
381
- # push!(remblock.args, lower_no_unroll(ls, ust, n, inclmask, false, UF-1))
382
- # end
365
+ push! (remblock. args, newblock)
366
+ break
367
+ end
368
+ comparison = unrollremcomparison (ls, loop, UFt, n, nisvectorized, remfirst)
369
+ if isone (num_loops (ls)) && isone (UFt)
370
+ remblocknew = Expr (:if , comparison, newblock)
371
+ push! (remblock. args, Expr (:block , Expr (:let , definemask (loop), remblocknew)))
372
+ remblock = remblocknew
373
+ else
374
+ remblocknew = Expr (iforelseif, comparison, newblock)
375
+ # remblocknew = Expr(:elseif, comparison, newblock)
376
+ push! (remblock. args, remblocknew)
377
+ remblock = remblocknew
378
+ iforelseif = :elseif
379
+ end
380
+ UFt += 1
383
381
end
382
+ # else
383
+ # ust = nisunrolled ? UnrollSpecification(us, 1, u₂) : UnrollSpecification(us, u₁, 1)
384
+ # # newblock = lower_block(ls, ust, n, remmask, 1)
385
+ # push!(remblock.args, lower_no_unroll(ls, ust, n, inclmask, false, UF-1))
386
+ # end
387
+ end
384
388
if (length (ls. outer_reductions) > 0 ) && (2 ≤ n < length (ls. loops))
385
389
pre, post = reinit_and_update_tiled_outer_reduct! (sl, q, ls, order[u₁loopnum], order[us. u₂loopnum], vectorized)
386
390
Expr (:block , pre, Expr (:let , sl, q), post)
0 commit comments