@@ -1041,22 +1041,20 @@ bool GCNSchedStage::initGCNSchedStage() {
10411041 return true ;
10421042}
10431043
1044- SlotIndex
1045- RewriteScheduleStage::findReachingDefs (MachineOperand &UseMO,
1046- LiveIntervals *LIS,
1047- SmallVectorImpl<SlotIndex> &DefIdxs) {
1044+ void RewriteScheduleStage::findReachingDefs (
1045+ MachineOperand &UseMO, LiveIntervals *LIS,
1046+ SmallVectorImpl<SlotIndex> &DefIdxs) {
10481047 assert (UseMO.isReg ());
10491048 MachineInstr *UseMI = UseMO.getParent ();
10501049 LiveInterval &UseLI = LIS->getInterval (UseMO.getReg ());
1051- auto VNInfo = UseLI.getVNInfoAt (LIS->getInstructionIndex (*UseMI));
1050+ VNInfo *VNI = UseLI.getVNInfoAt (LIS->getInstructionIndex (*UseMI));
10521051
1053- SlotIndex DefMBBStart =
1054- LIS->getMBBStartIdx (LIS->getMBBFromIndex (VNInfo->def ));
1052+ SlotIndex DefMBBStart = LIS->getMBBStartIdx (LIS->getMBBFromIndex (VNI->def ));
10551053
10561054 // If the def is in the block, then it must be the only reaching def.
1057- if (DefMBBStart != VNInfo ->def ) {
1058- DefIdxs.push_back (VNInfo ->def );
1059- return VNInfo-> def ;
1055+ if (DefMBBStart != VNI ->def ) {
1056+ DefIdxs.push_back (VNI ->def );
1057+ return ;
10601058 }
10611059
10621060 SmallPtrSet<MachineBasicBlock *, 8 > Visited;
@@ -1074,15 +1072,15 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
10741072 MachineBasicBlock *CurrMBB = Worklist.pop_back_val ();
10751073
10761074 SlotIndex CurrMBBEnd = LIS->getMBBEndIdx (CurrMBB);
1077- auto VNInfo = UseLI.getVNInfoAt (CurrMBBEnd.getPrevSlot ());
1075+ VNInfo *VNI = UseLI.getVNInfoAt (CurrMBBEnd.getPrevSlot ());
10781076
1079- MachineBasicBlock *DefMBB = LIS->getMBBFromIndex (VNInfo ->def );
1077+ MachineBasicBlock *DefMBB = LIS->getMBBFromIndex (VNI ->def );
10801078 SlotIndex DefMBBStart = LIS->getMBBStartIdx (DefMBB);
10811079
10821080 // If there is a def in this block, then add it to the list. This is the
10831081 // reaching def of this path.
1084- if (DefMBBStart != VNInfo ->def ) {
1085- DefIdxs.push_back (VNInfo ->def );
1082+ if (DefMBBStart != VNI ->def ) {
1083+ DefIdxs.push_back (VNI ->def );
10861084 continue ;
10871085 }
10881086
@@ -1091,8 +1089,6 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
10911089 Worklist.push_back (PredMBB);
10921090 }
10931091 }
1094-
1095- return VNInfo->def ;
10961092}
10971093
10981094void RewriteScheduleStage::findReachingUses (
@@ -1106,9 +1102,9 @@ void RewriteScheduleStage::findReachingUses(
11061102
11071103 // If we find a use that contains this DefMI in its reachingDefs, then it is
11081104 // a reaching use.
1109- if (find_if (ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1105+ if (any_of (ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
11101106 return SlotIndex::isSameInstr (RDIdx, DefIdx);
1111- }) != ReachingDefIndexes. end () )
1107+ }))
11121108 ReachingUses.push_back (&UseMO);
11131109 }
11141110}
@@ -1769,27 +1765,29 @@ bool RewriteScheduleStage::initHeuristics(
17691765 // Prepare for the heuristics
17701766 for (auto &MBB : MF) {
17711767 for (auto &MI : MBB) {
1772- if (isRewriteCandidate (&MI)) {
1773- int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ());
1774- if (ReplacementOp == -1 )
1775- continue ;
1768+ if (!isRewriteCandidate (&MI))
1769+ continue ;
17761770
1777- RewriteCands.push_back ({&MI, MI.getOpcode ()});
1778- MI.setDesc (TII->get (ReplacementOp));
1771+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ());
1772+ if (ReplacementOp == -1 )
1773+ continue ;
17791774
1780- MachineOperand *Src2 = TII->getNamedOperand (MI, AMDGPU::OpName::src2);
1781- if (Src2->isReg ()) {
1782- SmallVector<SlotIndex, 8 > Src2ReachingDefs;
1783- findReachingDefs (*Src2, DAG.LIS , Src2ReachingDefs);
1775+ RewriteCands.push_back ({&MI, MI.getOpcode ()});
1776+ MI.setDesc (TII->get (ReplacementOp));
17841777
1785- // For any definition of the src2 register which is non-MFMA, we
1786- // insert a copy.
1787- for (SlotIndex RDIdx : Src2ReachingDefs) {
1788- MachineInstr *RD = DAG.LIS ->getInstructionFromIndex (RDIdx);
1789- if (!TII->isMAI (*RD))
1790- CopyForDef.insert (RD);
1791- }
1778+ MachineOperand *Src2 = TII->getNamedOperand (MI, AMDGPU::OpName::src2);
1779+ if (Src2->isReg ()) {
1780+ SmallVector<SlotIndex, 8 > Src2ReachingDefs;
1781+ findReachingDefs (*Src2, DAG.LIS , Src2ReachingDefs);
1782+
1783+ // For any definition of the src2 register which is non-MFMA, we
1784+ // insert a copy.
1785+ for (SlotIndex RDIdx : Src2ReachingDefs) {
1786+ MachineInstr *RD = DAG.LIS ->getInstructionFromIndex (RDIdx);
1787+ if (!TII->isMAI (*RD))
1788+ CopyForDef.insert (RD);
17921789 }
1790+ }
17931791
17941792 MachineOperand &Dst = MI.getOperand (0 );
17951793 SmallVector<MachineOperand *, 8 > DstReachingUses;
@@ -1827,36 +1825,39 @@ bool RewriteScheduleStage::initHeuristics(
18271825 DAG.MRI .setRegClass (Dst.getReg (), AGPRRC);
18281826 if (Src2->isReg ())
18291827 DAG.MRI .setRegClass (Src2->getReg (), AGPRRC);
1830- }
18311828 }
18321829 }
18331830
18341831 return true ;
18351832}
18361833
18371834int64_t RewriteScheduleStage::getRewriteCost (
1838- std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands,
1839- DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
1840- SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
1835+ const std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands,
1836+ const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
1837+ const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
1838+ MachineBranchProbabilityInfo MBPI;
1839+ MachineBlockFrequencyInfo MBFI;
1840+
18411841 MBFI.calculate (MF, MBPI, *DAG.MLI );
18421842 int64_t BestSpillCost = 0 ;
18431843 int64_t Cost = 0 ;
18441844
1845+ uint64_t EntryFreq = MBFI.getEntryFreq ().getFrequency ();
1846+
18451847 for (unsigned Region = 0 ; Region < DAG.Regions .size (); Region++) {
18461848 if (!RegionsWithExcessArchVGPR[Region])
18471849 continue ;
18481850
1849- auto PressureBefore = DAG.Pressure [Region];
1850- unsigned SpillCostBefore = PressureBefore.getVGPRSpills (ST, MF);
1851+ GCNRegPressure & PressureBefore = DAG.Pressure [Region];
1852+ unsigned SpillCostBefore = PressureBefore.getVGPRSpills (MF);
18511853
18521854 // For the cases we care about (i.e. ArchVGPR usage is greater than the
18531855 // addressable limit), rewriting alone should bring pressure to manageable
18541856 // level. If we find any such region, then the rewrite is potentially
18551857 // beneficial.
1856- auto PressureAfter = DAG.getRealRegPressure (Region);
1857- unsigned SpillCostAfter = PressureAfter.getVGPRSpills (ST, MF);
1858+ GCNRegPressure PressureAfter = DAG.getRealRegPressure (Region);
1859+ unsigned SpillCostAfter = PressureAfter.getVGPRSpills (MF);
18581860
1859- uint64_t EntryFreq = MBFI.getEntryFreq ().getFrequency ();
18601861 uint64_t BlockFreq =
18611862 MBFI.getBlockFreq (DAG.Regions [Region].first ->getParent ())
18621863 .getFrequency ();
@@ -1893,8 +1894,6 @@ int64_t RewriteScheduleStage::getRewriteCost(
18931894
18941895 unsigned CopyCost = 0 ;
18951896
1896- uint64_t EntryFreq = MBFI.getEntryFreq ().getFrequency ();
1897-
18981897 // For each CopyForDef, increase the cost by the register size while
18991898 // accounting for block frequency.
19001899 for (auto *DefMI : CopyForDef) {
@@ -1910,12 +1909,11 @@ int64_t RewriteScheduleStage::getRewriteCost(
19101909 }
19111910
19121911 // Account for CopyForUse copies in each block that the register is used.
1913- for (auto &UseEntry : CopyForUse) {
1912+ for (auto &[UseBlock, UseRegs] : CopyForUse) {
19141913 uint64_t UseFreq =
1915- EntryFreq ? MBFI.getBlockFreq (UseEntry.first ).getFrequency () / EntryFreq
1916- : 1 ;
1914+ EntryFreq ? MBFI.getBlockFreq (UseBlock).getFrequency () / EntryFreq : 1 ;
19171915
1918- for (auto UseReg : UseEntry. second ) {
1916+ for (auto UseReg : UseRegs ) {
19191917 unsigned RegSize =
19201918 DAG.TRI ->getRegSizeInBits (*DAG.MRI .getRegClass (UseReg));
19211919 unsigned NumRegs = std::max (RegSize / 32 , (unsigned )1 );
@@ -1927,9 +1925,7 @@ int64_t RewriteScheduleStage::getRewriteCost(
19271925
19281926 // Reset to the vgpr form. We must do rewriting after copy-insertion, as some
19291927 // defs of the register may require VGPR.
1930- for (auto RI : RewriteCands) {
1931- MachineInstr *MI = RI.first ;
1932-
1928+ for (auto &[MI, OriginalOpcode] : RewriteCands) {
19331929 assert (TII->isMAI (*MI));
19341930 const TargetRegisterClass *AGPRRC =
19351931 DAG.MRI .getRegClass (MI->getOperand (0 ).getReg ());
@@ -1938,18 +1934,17 @@ int64_t RewriteScheduleStage::getRewriteCost(
19381934 MachineOperand *Src2 = TII->getNamedOperand (*MI, AMDGPU::OpName::src2);
19391935 assert (Src2);
19401936
1941- if (Src2->isReg ()) {
1937+ if (Src2->isReg ())
19421938 DAG.MRI .setRegClass (Src2->getReg (), VGPRRC);
1943- }
19441939 DAG.MRI .setRegClass (MI->getOperand (0 ).getReg (), VGPRRC);
1945- MI->setDesc (TII->get (RI. second ));
1940+ MI->setDesc (TII->get (OriginalOpcode ));
19461941 }
19471942
19481943 return Cost;
19491944}
19501945
19511946bool RewriteScheduleStage::rewrite (
1952- std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands) {
1947+ const std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands) {
19531948 DenseMap<MachineInstr *, unsigned > FirstMIToRegion;
19541949 DenseMap<MachineInstr *, unsigned > LastMIToRegion;
19551950
@@ -1983,7 +1978,7 @@ bool RewriteScheduleStage::rewrite(
19831978 // want to replace the register it is using with the result of the copy, we
19841979 // must handle case 3. In the third case, we simply insert a copy after each
19851980 // of the reaching defs to connect to the copy of the reaching uses of the dst
1986- // reg. This allows us to avoid inserting copies next to the' MFMAs.
1981+ // reg. This allows us to avoid inserting copies next to the MFMAs.
19871982 //
19881983 // While inserting the copies, we maintain a map of operands which will use
19891984 // different regs (i.e. the result of the copies). For example, a case 1 src2
@@ -1994,14 +1989,14 @@ bool RewriteScheduleStage::rewrite(
19941989 // queries.
19951990 //
19961991 // While inserting the copies, we also maintain a list or registers which we
1997- // will want to reclassify as AGPR. After doing the copy isnertion and the
1992+ // will want to reclassify as AGPR. After doing the copy insertion and the
19981993 // register replacement, we can finally do the reclassification. This uses the
19991994 // redef map, as the registers we are interested in reclassifying may be
20001995 // replaced by the result of a copy. We must do this after the copy analysis
20011996 // and placement as we must have an accurate redef map -- otherwise we may end
20021997 // up creating illegal instructions.
20031998
2004- // The original registers of the MFMA that need to be reclassified as AGPR
1999+ // The original registers of the MFMA that need to be reclassified as AGPR.
20052000 std::set<Register> RewriteRegs;
20062001 // The map of an original register in the MFMA to a new register (result of a
20072002 // copy) that it should be replaced with.
@@ -2015,16 +2010,15 @@ bool RewriteScheduleStage::rewrite(
20152010 DenseMap<unsigned , DenseMap<Register, SmallPtrSet<MachineOperand *, 8 >>>
20162011 ReachingUseTracker;
20172012
2018- for (auto &RI : RewriteCands) {
2019- MachineInstr &MI = *RI.first ;
2013+ for (auto &[MI, OriginalOpcode] : RewriteCands) {
20202014
2021- int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI. getOpcode ());
2015+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI-> getOpcode ());
20222016 if (ReplacementOp == -1 )
20232017 continue ;
2024- MI. setDesc (TII->get (ReplacementOp));
2018+ MI-> setDesc (TII->get (ReplacementOp));
20252019
20262020 // Case 1: insert copies for the reaching defs of the Src2Reg.
2027- MachineOperand *Src2 = TII->getNamedOperand (MI, AMDGPU::OpName::src2);
2021+ MachineOperand *Src2 = TII->getNamedOperand (* MI, AMDGPU::OpName::src2);
20282022
20292023 if (Src2->isReg ()) {
20302024 Register Src2Reg = Src2->getReg ();
@@ -2094,7 +2088,7 @@ bool RewriteScheduleStage::rewrite(
20942088 // Case 2 and Case 3: insert copies before the reaching uses of the dsts,
20952089 // and after the reaching defs of the reaching uses of the dsts.
20962090
2097- MachineOperand *Dst = &MI. getOperand (0 );
2091+ MachineOperand *Dst = &MI-> getOperand (0 );
20982092 Register DstReg = Dst->getReg ();
20992093 if (!DstReg.isVirtual ())
21002094 return false ;
@@ -2105,7 +2099,7 @@ bool RewriteScheduleStage::rewrite(
21052099 SmallVector<MachineOperand *, 8 > DstReachingUseCopies;
21062100 SmallVector<MachineInstr *, 8 > DstUseDefsReplace;
21072101
2108- findReachingUses (& MI, DAG.LIS , DstReachingUses);
2102+ findReachingUses (MI, DAG.LIS , DstReachingUses);
21092103
21102104 for (MachineOperand *RUOp : DstReachingUses) {
21112105 if (TII->isMAI (*RUOp->getParent ()))
@@ -2169,7 +2163,7 @@ bool RewriteScheduleStage::rewrite(
21692163 MachineBasicBlock *RUBlock = RU->getParent ()->getParent ();
21702164 // Just keep track of the reaching use of this register by block. After we
21712165 // have scanned all the MFMAs we can find optimal insert pts.
2172- if (RUBlock != MI. getParent ()) {
2166+ if (RUBlock != MI-> getParent ()) {
21732167 ReachingUseTracker[RUBlock->getNumber ()][DstReg].insert (RU);
21742168 continue ;
21752169 }
0 commit comments