@@ -112,8 +112,10 @@ class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
112
112
SmallVectorImpl<Register> &CandidateRegs) const ;
113
113
114
114
void collectWaterfallCandidateRegisters (
115
- MachineBasicBlock *Loop,
116
- SmallSetVector<Register, 16 > &CandidateRegs) const ;
115
+ MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
116
+ SmallSetVector<Register, 16 > &CandidateRegs,
117
+ SmallSetVector<MachineBasicBlock *, 2 > &Blocks,
118
+ SmallVectorImpl<MachineInstr *> &Instructions) const ;
117
119
118
120
void findNonPHIUsesInBlock (Register Reg, MachineBasicBlock *MBB,
119
121
SmallVectorImpl<MachineInstr *> &Uses) const ;
@@ -131,7 +133,10 @@ class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
131
133
MachineBasicBlock *Flow, MachineBasicBlock *Endif,
132
134
SmallSetVector<MachineBasicBlock *, 16 > &ElseBlocks) const ;
133
135
134
- void optimizeWaterfallLiveRange (Register Reg, MachineBasicBlock *If) const ;
136
+ void optimizeWaterfallLiveRange (
137
+ Register Reg, MachineBasicBlock *LoopHeader,
138
+ SmallSetVector<MachineBasicBlock *, 2 > &LoopBlocks,
139
+ SmallVectorImpl<MachineInstr *> &Instructions) const ;
135
140
136
141
SIOptimizeVGPRLiveRange () : MachineFunctionPass(ID) {}
137
142
@@ -323,12 +328,30 @@ void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
323
328
// / Collect the registers used in the waterfall loop block that are defined
324
329
// / before.
325
330
void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters (
326
- MachineBasicBlock *Loop,
327
- SmallSetVector<Register, 16 > &CandidateRegs) const {
331
+ MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
332
+ SmallSetVector<Register, 16 > &CandidateRegs,
333
+ SmallSetVector<MachineBasicBlock *, 2 > &Blocks,
334
+ SmallVectorImpl<MachineInstr *> &Instructions) const {
335
+
336
+ // Collect loop instructions, potentially spanning multiple blocks
337
+ auto *MBB = LoopHeader;
338
+ for (;;) {
339
+ Blocks.insert (MBB);
340
+ for (auto &MI : *MBB) {
341
+ if (MI.isDebugInstr ())
342
+ continue ;
343
+ Instructions.push_back (&MI);
344
+ }
345
+ if (MBB == LoopEnd)
346
+ break ;
347
+ assert (MBB->pred_size () == 1 ||
348
+ (MBB == LoopHeader && MBB->pred_size () == 2 ));
349
+ assert (MBB->succ_size () == 1 );
350
+ MBB = *MBB->succ_begin ();
351
+ }
328
352
329
- for (auto &MI : Loop->instrs ()) {
330
- if (MI.isDebugInstr ())
331
- continue ;
353
+ for (auto *I : Instructions) {
354
+ auto &MI = *I;
332
355
333
356
for (auto &MO : MI.operands ()) {
334
357
if (!MO.isReg () || !MO.getReg () || MO.isDef ())
@@ -340,16 +363,17 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
340
363
continue ;
341
364
342
365
if (MO.readsReg ()) {
343
- const MachineBasicBlock *DefMBB = MRI->getVRegDef (MOReg)->getParent ();
366
+ MachineBasicBlock *DefMBB = MRI->getVRegDef (MOReg)->getParent ();
344
367
// Make sure the value is defined before the LOOP block
345
- if (DefMBB != Loop && !CandidateRegs.contains (MOReg)) {
368
+ if (!Blocks. contains (DefMBB) && !CandidateRegs.contains (MOReg)) {
346
369
// If the variable is used after the loop, the register coalescer will
347
370
// merge the newly created register and remove the phi node again.
348
371
// Just do nothing in that case.
349
372
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo (MOReg);
350
373
bool IsUsed = false ;
351
- for (auto *Succ : Loop->successors ()) {
352
- if (Succ != Loop && OldVarInfo.isLiveIn (*Succ, MOReg, *MRI)) {
374
+ for (auto *Succ : LoopEnd->successors ()) {
375
+ if (!Blocks.contains (Succ) &&
376
+ OldVarInfo.isLiveIn (*Succ, MOReg, *MRI)) {
353
377
IsUsed = true ;
354
378
break ;
355
379
}
@@ -513,7 +537,9 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange(
513
537
}
514
538
515
539
void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange (
516
- Register Reg, MachineBasicBlock *Loop) const {
540
+ Register Reg, MachineBasicBlock *LoopHeader,
541
+ SmallSetVector<MachineBasicBlock *, 2 > &Blocks,
542
+ SmallVectorImpl<MachineInstr *> &Instructions) const {
517
543
// Insert a new PHI, marking the value from the last loop iteration undef.
518
544
LLVM_DEBUG (dbgs () << " Optimizing " << printReg (Reg, TRI) << ' \n ' );
519
545
const auto *RC = MRI->getRegClass (Reg);
@@ -525,15 +551,16 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
525
551
for (auto &O : make_early_inc_range (MRI->use_operands (Reg))) {
526
552
auto *UseMI = O.getParent ();
527
553
auto *UseBlock = UseMI->getParent ();
528
- // Replace uses in Loop block
529
- if (UseBlock == Loop )
554
+ // Replace uses in Loop blocks
555
+ if (Blocks. contains ( UseBlock) )
530
556
O.setReg (NewReg);
531
557
}
532
558
533
- MachineInstrBuilder PHI = BuildMI (*Loop, Loop->getFirstNonPHI (), DebugLoc (),
534
- TII->get (TargetOpcode::PHI), NewReg);
535
- for (auto *Pred : Loop->predecessors ()) {
536
- if (Pred == Loop)
559
+ MachineInstrBuilder PHI =
560
+ BuildMI (*LoopHeader, LoopHeader->getFirstNonPHI (), DebugLoc (),
561
+ TII->get (TargetOpcode::PHI), NewReg);
562
+ for (auto *Pred : LoopHeader->predecessors ()) {
563
+ if (Blocks.contains (Pred))
537
564
PHI.addReg (UndefReg, RegState::Undef).addMBB (Pred);
538
565
else
539
566
PHI.addReg (Reg).addMBB (Pred);
@@ -542,21 +569,36 @@ void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
542
569
LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo (NewReg);
543
570
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo (Reg);
544
571
545
- // collectWaterfallCandidateRegisters only collects registers that are dead
546
- // after the loop. So we know that the old reg is not live throughout the
547
- // whole block anymore.
548
- OldVarInfo.AliveBlocks .reset (Loop->getNumber ());
549
-
550
- // Mark the last use as kill
551
- for (auto &MI : reverse (Loop->instrs ())) {
552
- if (MI.readsRegister (NewReg, TRI)) {
553
- MI.addRegisterKilled (NewReg, TRI);
554
- NewVarInfo.Kills .push_back (&MI);
572
+ // Find last use and mark as kill
573
+ MachineInstr *Kill = nullptr ;
574
+ for (auto *MI : reverse (Instructions)) {
575
+ if (MI->readsRegister (NewReg, TRI)) {
576
+ MI->addRegisterKilled (NewReg, TRI);
577
+ NewVarInfo.Kills .push_back (MI);
578
+ Kill = MI;
555
579
break ;
556
580
}
557
581
}
558
- assert (!NewVarInfo.Kills .empty () &&
559
- " Failed to find last usage of register in loop" );
582
+ assert (Kill && " Failed to find last usage of register in loop" );
583
+
584
+ MachineBasicBlock *KillBlock = Kill->getParent ();
585
+ bool PostKillBlock = false ;
586
+ for (auto *Block : Blocks) {
587
+ auto BBNum = Block->getNumber ();
588
+
589
+ // collectWaterfallCandidateRegisters only collects registers that are dead
590
+ // after the loop. So we know that the old reg is no longer live throughout
591
+ // the waterfall loop.
592
+ OldVarInfo.AliveBlocks .reset (BBNum);
593
+
594
+ // The new register is live up to (and including) the block that kills it.
595
+ PostKillBlock |= (Block == KillBlock);
596
+ if (PostKillBlock) {
597
+ NewVarInfo.AliveBlocks .reset (BBNum);
598
+ } else if (Block != LoopHeader) {
599
+ NewVarInfo.AliveBlocks .set (BBNum);
600
+ }
601
+ }
560
602
}
561
603
562
604
char SIOptimizeVGPRLiveRange::ID = 0 ;
@@ -620,15 +662,22 @@ bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
620
662
for (auto Reg : CandidateRegs)
621
663
optimizeLiveRange (Reg, &MBB, IfTarget, Endif, ElseBlocks);
622
664
} else if (MI.getOpcode () == AMDGPU::SI_WATERFALL_LOOP) {
665
+ auto *LoopHeader = MI.getOperand (0 ).getMBB ();
666
+ auto *LoopEnd = &MBB;
667
+
623
668
LLVM_DEBUG (dbgs () << " Checking Waterfall loop: "
624
- << printMBBReference (MBB ) << ' \n ' );
669
+ << printMBBReference (*LoopHeader ) << ' \n ' );
625
670
626
671
SmallSetVector<Register, 16 > CandidateRegs;
627
- collectWaterfallCandidateRegisters (&MBB, CandidateRegs);
672
+ SmallVector<MachineInstr *, 16 > Instructions;
673
+ SmallSetVector<MachineBasicBlock *, 2 > Blocks;
674
+
675
+ collectWaterfallCandidateRegisters (LoopHeader, LoopEnd, CandidateRegs,
676
+ Blocks, Instructions);
628
677
MadeChange |= !CandidateRegs.empty ();
629
678
// Now we are safe to optimize.
630
679
for (auto Reg : CandidateRegs)
631
- optimizeWaterfallLiveRange (Reg, &MBB );
680
+ optimizeWaterfallLiveRange (Reg, LoopHeader, Blocks, Instructions );
632
681
}
633
682
}
634
683
}
0 commit comments