@@ -151,7 +151,6 @@ function lower_load_no_optranslation!(
151
151
loadexpr = Expr (:call , lv (:_vload ), sptr (op), inds)
152
152
add_memory_mask! (loadexpr, op, td, mask, ls, 0 )
153
153
push! (loadexpr. args, falseexpr, rs) # unaligned load
154
- # @show op loadexpr
155
154
push! (q. args, Expr (:(= ), mvar, loadexpr))
156
155
elseif (u₁ > 1 ) & opu₁
157
156
t = Expr (:tuple )
@@ -417,7 +416,6 @@ function rejectinterleave(ls::LoopSet, op::Operation, vloop::Loop, idsformap::Su
417
416
end
418
417
end
419
418
vloopsym = vloop. itersymbol;
420
- # @show op first(getindices(op)) length(idsformap), first(getstrides(op)), gethint(strd)
421
419
(first (getindices (op)) === vloopsym) && (length (idsformap) ≠ abs (first (getstrides (op)) * gethint (strd)))
422
420
end
423
421
# function lower_load_collection_manual_u₁unroll!(
@@ -436,100 +434,99 @@ end
436
434
# op.mangledvariable = _mvar
437
435
# end
438
436
function lower_load_collection! (
439
- q:: Expr , ls:: LoopSet , opidmap:: Vector{Int} ,
440
- idsformap:: SubArray{Tuple{Int,Int}, 1, Vector{Tuple{Int,Int}}, Tuple{UnitRange{Int}}, true} ,
441
- ua:: UnrollArgs , mask:: Bool , inds_calc_by_ptr_offset:: Vector{Bool}
437
+ q:: Expr , ls:: LoopSet , opidmap:: Vector{Int} ,
438
+ idsformap:: SubArray{Tuple{Int,Int}, 1, Vector{Tuple{Int,Int}}, Tuple{UnitRange{Int}}, true} ,
439
+ ua:: UnrollArgs , mask:: Bool , inds_calc_by_ptr_offset:: Vector{Bool}
442
440
)
443
- @unpack u₁, u₁loop, u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
441
+ @unpack u₁, u₁loop, u₁loopsym, u₂loopsym, vloopsym, vloop, suffix = ua
444
442
445
- ops = operations (ls)
446
- nouter = length (idsformap)
447
- # ua = UnrollArgs(nouter, unrollsyms, u₂, 0)
448
- # idsformap contains (index, offset) pairs
449
- op = ops[opidmap[first (first (idsformap))]]
450
- # if isu₁unrolled(op) && u₁ > 1 && !isknown(step(u₁loop))
451
- # return lower_load_collection_manual_u₁unroll!(
452
- # q, ls, opidmap, idsformap, ua,
453
- # mask, inds_calc_by_ptr_offset, op
454
- # )
455
- # end
456
- opindices = getindices (op)
457
- interleave = first (opindices) === vloopsym
458
- # construct dummy unrolled loop
459
- offset_dummy_loop = Loop (first (opindices), MaybeKnown (1 ), MaybeKnown (1024 ), MaybeKnown (1 ), Symbol (" " ), Symbol (" " ))
460
- unrollcurl₂ = unrolled_curly (op, nouter, offset_dummy_loop, vloop, mask, 1 ) # interleave always 1 here
461
- inds = mem_offset_u (op, ua, inds_calc_by_ptr_offset, false , 0 , ls, false )
462
- falseexpr = Expr (:call , lv (:False )); rs = staticexpr (reg_size (ls));
443
+ ops = operations (ls)
444
+ nouter = length (idsformap)
445
+ # ua = UnrollArgs(nouter, unrollsyms, u₂, 0)
446
+ # idsformap contains (index, offset) pairs
447
+ op = ops[opidmap[first (first (idsformap))]]
448
+ # if isu₁unrolled(op) && u₁ > 1 && !isknown(step(u₁loop))
449
+ # return lower_load_collection_manual_u₁unroll!(
450
+ # q, ls, opidmap, idsformap, ua,
451
+ # mask, inds_calc_by_ptr_offset, op
452
+ # )
453
+ # end
454
+ opindices = getindices (op)
455
+ # construct dummy unrolled loop
456
+ offset_dummy_loop = Loop (first (opindices), MaybeKnown (1 ), MaybeKnown (1024 ), MaybeKnown (1 ), Symbol (" " ), Symbol (" " ))
457
+ unrollcurl₂ = unrolled_curly (op, nouter, offset_dummy_loop, vloop, mask, 1 ) # interleave always 1 here
458
+ inds = mem_offset_u (op, ua, inds_calc_by_ptr_offset, false , 0 , ls, false )
459
+ falseexpr = Expr (:call , lv (:False )); rs = staticexpr (reg_size (ls));
463
460
464
- opu₁, opu₂ = isunrolled_sym (op, u₁loopsym, u₂loopsym, vloopsym, ls)
465
- manualunrollu₁ = if opu₁ && u₁ > 1 # both unrolled
466
- if isknown (step (u₁loop)) && sum (Base. Fix2 (=== ,u₁loopsym), getindicesonly (op)) == 1
467
- if interleave # TODO : handle this better than using `rejectinterleave`
468
- interleaveval = - nouter
469
- else
470
- interleaveval = 0
471
- end
472
- unrollcurl₁ = unrolled_curly (op, u₁, ua. u₁loop, vloop, mask, interleaveval)
473
- inds = Expr (:call , unrollcurl₁, inds)
474
- false
475
- else
476
- true # u₁ > 1 already checked to reach here
477
- end
461
+ opu₁, opu₂ = isunrolled_sym (op, u₁loopsym, u₂loopsym, vloopsym, ls)
462
+ manualunrollu₁ = if opu₁ && u₁ > 1 # both unrolled
463
+ if isknown (step (u₁loop)) && sum (Base. Fix2 (=== ,u₁loopsym), getindicesonly (op)) == 1
464
+ # if first(opindices) === u₁loopsym#vloopsym
465
+ # interleaveval = -nouter
466
+ # else
467
+ interleaveval = 0
468
+ # end
469
+ unrollcurl₁ = unrolled_curly (op, u₁, ua. u₁loop, vloop, mask, interleaveval)
470
+ inds = Expr (:call , unrollcurl₁, inds)
471
+ false
478
472
else
479
- false
473
+ true # u₁ > 1 already checked to reach here
480
474
end
481
- uinds = Expr (:call , unrollcurl₂, inds)
482
- sptrsym = sptr! (q, op)
483
- loadexpr = Expr (:call , lv (:_vload ), sptrsym, uinds)
484
- # not using `add_memory_mask!(storeexpr, op, ua, mask, ls, 0)` because we checked `isconditionalmemop` earlier in `lower_load_collection!`
485
- u₁vectorized = u₁loopsym === vloopsym
486
- if (mask && isvectorized (op))
487
- if ! (manualunrollu₁ & u₁vectorized)
488
- push! (loadexpr. args, MASKSYMBOL)
489
- end
475
+ else
476
+ false
477
+ end
478
+ uinds = Expr (:call , unrollcurl₂, inds)
479
+ sptrsym = sptr! (q, op)
480
+ loadexpr = Expr (:call , lv (:_vload ), sptrsym, uinds)
481
+ # not using `add_memory_mask!(storeexpr, op, ua, mask, ls, 0)` because we checked `isconditionalmemop` earlier in `lower_load_collection!`
482
+ u₁vectorized = u₁loopsym === vloopsym
483
+ if (mask && isvectorized (op))
484
+ if ! (manualunrollu₁ & u₁vectorized)
485
+ push! (loadexpr. args, MASKSYMBOL)
490
486
end
491
- push! (loadexpr. args, falseexpr, rs)
492
- collectionname = Symbol (vptr (op), " ##collection##number#" , opidmap[first (first (idsformap))], " #" , suffix, " ##size##" , nouter, " ##u₁##" , u₁)
493
- gf = GlobalRef (Core,:getfield )
494
- if manualunrollu₁
495
- masklast = mask & u₁vectorized & isvectorized (op)
496
- extractedvs = Vector {Expr} (undef, length (idsformap))
497
- for i ∈ eachindex (extractedvs)
498
- extractedvs[i] = Expr (:tuple )
499
- end
500
- for u ∈ 0 : u₁- 1
501
- collectionname_u = Symbol (collectionname, :_ , u)
502
- if u ≠ 0
503
- inds = mem_offset_u (op, ua, inds_calc_by_ptr_offset, false , u, ls, false )
504
- uinds = Expr (:call , unrollcurl₂, inds)
505
- loadexpr = copy (loadexpr)
506
- loadexpr. args[3 ] = Expr (:call , unrollcurl₂, inds)
507
- (((u+ 1 ) == u₁) & masklast) && insert! (loadexpr. args, length (loadexpr. args)- 1 , MASKSYMBOL) # 1 for `falseexpr` pushed at end
508
- end
509
- # unpack_collection!(q, ls, opidmap, idsformap, ua, loadexpr, collectionname, op, false)
510
- push! (q. args, Expr (:(= ), collectionname_u, Expr (:call , gf, loadexpr, 1 )))
511
- # getfield to extract data from `VecUnroll` object, so we have a tuple
512
- for (i,(opid,o)) ∈ enumerate (idsformap)
513
- ext = extractedvs[i]
514
- if (u+ 1 ) == u₁
515
- _op = ops[opidmap[opid]]
516
- mvar = Symbol (variable_name (_op, Core. ifelse (opu₂, suffix, - 1 )), ' _' , u₁)
517
- push! (q. args, Expr (:(= ), mvar, Expr (:call , lv (:VecUnroll ), ext)))
518
- end
519
- push! (ext. args, Expr (:call , gf, collectionname_u, i, false ))
520
- end
487
+ end
488
+ push! (loadexpr. args, falseexpr, rs)
489
+ collectionname = Symbol (vptr (op), " ##collection##number#" , opidmap[first (first (idsformap))], " #" , suffix, " ##size##" , nouter, " ##u₁##" , u₁)
490
+ gf = GlobalRef (Core,:getfield )
491
+ if manualunrollu₁
492
+ masklast = mask & u₁vectorized & isvectorized (op)
493
+ extractedvs = Vector {Expr} (undef, length (idsformap))
494
+ for i ∈ eachindex (extractedvs)
495
+ extractedvs[i] = Expr (:tuple )
496
+ end
497
+ for u ∈ 0 : u₁- 1
498
+ collectionname_u = Symbol (collectionname, :_ , u)
499
+ if u ≠ 0
500
+ inds = mem_offset_u (op, ua, inds_calc_by_ptr_offset, false , u, ls, false )
501
+ uinds = Expr (:call , unrollcurl₂, inds)
502
+ loadexpr = copy (loadexpr)
503
+ loadexpr. args[3 ] = Expr (:call , unrollcurl₂, inds)
504
+ (((u+ 1 ) == u₁) & masklast) && insert! (loadexpr. args, length (loadexpr. args)- 1 , MASKSYMBOL) # 1 for `falseexpr` pushed at end
505
+ end
506
+ # unpack_collection!(q, ls, opidmap, idsformap, ua, loadexpr, collectionname, op, false)
507
+ push! (q. args, Expr (:(= ), collectionname_u, Expr (:call , gf, loadexpr, 1 )))
508
+ # getfield to extract data from `VecUnroll` object, so we have a tuple
509
+ for (i,(opid,o)) ∈ enumerate (idsformap)
510
+ ext = extractedvs[i]
511
+ if (u+ 1 ) == u₁
512
+ _op = ops[opidmap[opid]]
513
+ mvar = Symbol (variable_name (_op, Core. ifelse (opu₂, suffix, - 1 )), ' _' , u₁)
514
+ push! (q. args, Expr (:(= ), mvar, Expr (:call , lv (:VecUnroll ), ext)))
521
515
end
522
- else
523
- push! (q. args, Expr (:(= ), collectionname, Expr (:call , gf, loadexpr, 1 )))
524
- # getfield to extract data from `VecUnroll` object, so we have a tuple
525
- u = Core. ifelse (opu₁, u₁, 1 )
526
- for (i,(opid,o)) ∈ enumerate (idsformap)
527
- extractedv = Expr (:call , gf, collectionname, i, false )
516
+ push! (ext. args, Expr (:call , gf, collectionname_u, i, false ))
517
+ end
518
+ end
519
+ else
520
+ push! (q. args, Expr (:(= ), collectionname, Expr (:call , gf, loadexpr, 1 )))
521
+ # getfield to extract data from `VecUnroll` object, so we have a tuple
522
+ u = Core. ifelse (opu₁, u₁, 1 )
523
+ for (i,(opid,o)) ∈ enumerate (idsformap)
524
+ extractedv = Expr (:call , gf, collectionname, i, false )
528
525
529
- _op = ops[opidmap[opid]]
530
- mvar = Symbol (variable_name (_op, Core. ifelse (opu₂, suffix, - 1 )), ' _' , u)
531
- push! (q. args, Expr (:(= ), mvar, extractedv))
532
- end
533
- # unpack_collection!(q, ls, opidmap, idsformap, ua, loadexpr, collectionname, op, true)
526
+ _op = ops[opidmap[opid]]
527
+ mvar = Symbol (variable_name (_op, Core. ifelse (opu₂, suffix, - 1 )), ' _' , u)
528
+ push! (q. args, Expr (:(= ), mvar, extractedv))
534
529
end
530
+ # unpack_collection!(q, ls, opidmap, idsformap, ua, loadexpr, collectionname, op, true)
531
+ end
535
532
end
0 commit comments