3030#include " llvm/CodeGen/LiveIntervals.h"
3131#include " llvm/CodeGen/LiveRegMatrix.h"
3232#include " llvm/CodeGen/LiveStacks.h"
33+ #include " llvm/CodeGen/MachineDominators.h"
3334#include " llvm/CodeGen/MachineFrameInfo.h"
3435#include " llvm/CodeGen/MachineFunctionPass.h"
3536#include " llvm/CodeGen/VirtRegMap.h"
@@ -58,6 +59,7 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
5859 LiveIntervals &LIS;
5960 LiveStacks &LSS;
6061 const RegisterClassInfo &RegClassInfo;
62+ MachineDominatorTree &MDT;
6163
6264 bool attemptReassignmentsToAGPR (SmallSetVector<Register, 4 > &InterferingRegs,
6365 MCPhysReg PrefPhysReg) const ;
@@ -66,10 +68,11 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
6668 AMDGPURewriteAGPRCopyMFMAImpl (MachineFunction &MF, VirtRegMap &VRM,
6769 LiveRegMatrix &LRM, LiveIntervals &LIS,
6870 LiveStacks &LSS,
69- const RegisterClassInfo &RegClassInfo)
71+ const RegisterClassInfo &RegClassInfo,
72+ MachineDominatorTree &MDT)
7073 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
7174 TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
72- LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
75+ LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo), MDT(MDT) {}
7376
7477 bool isRewriteCandidate (const MachineInstr &MI) const {
7578 return TII.isMAI (MI) && AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ()) != -1 ;
@@ -515,6 +518,47 @@ void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
515518 if (SpillReferences == SpillSlotReferences.end ())
516519 continue ;
517520
521+ SmallVector<MachineInstr *, 4 > Stores, Loads;
522+ Stores.reserve (SpillReferences->second .size ());
523+ Loads.reserve (SpillReferences->second .size ());
524+ for (MachineInstr *MI : SpillReferences->second ) {
525+ if (MI->mayStore ())
526+ Stores.push_back (MI);
527+ else if (MI->mayLoad ())
528+ Loads.push_back (MI);
529+ }
530+
531+ // Only eliminate spills when a single store dominates all loads.
532+ if (Stores.size () != 1 ) {
533+ LLVM_DEBUG (dbgs () << " Skipping " << printReg (Slot, &TRI)
534+ << " : multiple stores (" << Stores.size () << " )\n " );
535+ continue ;
536+ }
537+
538+ MachineInstr *StoreMI = Stores.front ();
539+ MachineBasicBlock *StoreMBB = StoreMI->getParent ();
540+
541+ // Check that the store dominates all loads.
542+ bool AllDominated = llvm::all_of (Loads, [&](MachineInstr *LoadMI) {
543+ MachineBasicBlock *LoadMBB = LoadMI->getParent ();
544+ if (StoreMBB == LoadMBB) {
545+ for (MachineBasicBlock::iterator I = StoreMI->getIterator (),
546+ E = StoreMBB->end ();
547+ I != E; ++I) {
548+ if (&*I == LoadMI)
549+ return true ;
550+ }
551+ return false ;
552+ }
553+ return MDT.dominates (StoreMBB, LoadMBB);
554+ });
555+
556+ if (!AllDominated) {
557+ LLVM_DEBUG (dbgs () << " Skipping " << printReg (Slot, &TRI)
558+ << " : store does not dominate all loads\n " );
559+ continue ;
560+ }
561+
518562 const TargetRegisterClass *RC = LSS.getIntervalRegClass (Slot);
519563
520564 LLVM_DEBUG (dbgs () << " Trying to eliminate " << printReg (Slot, &TRI)
@@ -603,11 +647,13 @@ class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
603647 AU.addRequired <VirtRegMapWrapperLegacy>();
604648 AU.addRequired <LiveRegMatrixWrapperLegacy>();
605649 AU.addRequired <LiveStacksWrapperLegacy>();
650+ AU.addRequired <MachineDominatorTreeWrapperPass>();
606651
607652 AU.addPreserved <LiveIntervalsWrapperPass>();
608653 AU.addPreserved <VirtRegMapWrapperLegacy>();
609654 AU.addPreserved <LiveRegMatrixWrapperLegacy>();
610655 AU.addPreserved <LiveStacksWrapperLegacy>();
656+ AU.addPreserved <MachineDominatorTreeWrapperPass>();
611657
612658 AU.setPreservesAll ();
613659 MachineFunctionPass::getAnalysisUsage (AU);
@@ -622,6 +668,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
622668INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
623669INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
624670INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
671+ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
625672INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
626673 " AMDGPU Rewrite AGPR-Copy-MFMA" , false , false )
627674
@@ -641,7 +688,8 @@ bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
641688 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM ();
642689 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS ();
643690 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS ();
644- AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, LSS, RegClassInfo);
691+ auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree ();
692+ AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, LSS, RegClassInfo, MDT);
645693 return Impl.run (MF);
646694}
647695
@@ -652,10 +700,12 @@ AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
652700 LiveRegMatrix &LRM = MFAM.getResult <LiveRegMatrixAnalysis>(MF);
653701 LiveIntervals &LIS = MFAM.getResult <LiveIntervalsAnalysis>(MF);
654702 LiveStacks &LSS = MFAM.getResult <LiveStacksAnalysis>(MF);
703+ MachineDominatorTree &MDT =
704+ MFAM.getResult <MachineDominatorTreeAnalysis>(MF);
655705 RegisterClassInfo RegClassInfo;
656706 RegClassInfo.runOnMachineFunction (MF);
657707
658- AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, LSS, RegClassInfo);
708+ AMDGPURewriteAGPRCopyMFMAImpl Impl (MF, VRM, LRM, LIS, LSS, RegClassInfo, MDT );
659709 if (!Impl.run (MF))
660710 return PreservedAnalyses::all ();
661711 auto PA = getMachineFunctionPassPreservedAnalyses ();
0 commit comments