@@ -1223,22 +1223,20 @@ bool GCNSchedStage::initGCNSchedStage() {
12231223 return true ;
12241224}
12251225
1226- SlotIndex
1227- RewriteScheduleStage::findReachingDefs (MachineOperand &UseMO,
1228- LiveIntervals *LIS,
1229- SmallVectorImpl<SlotIndex> &DefIdxs) {
1226+ void RewriteScheduleStage::findReachingDefs (
1227+ MachineOperand &UseMO, LiveIntervals *LIS,
1228+ SmallVectorImpl<SlotIndex> &DefIdxs) {
12301229 assert (UseMO.isReg ());
12311230 MachineInstr *UseMI = UseMO.getParent ();
12321231 LiveInterval &UseLI = LIS->getInterval (UseMO.getReg ());
1233- auto VNInfo = UseLI.getVNInfoAt (LIS->getInstructionIndex (*UseMI));
1232+ VNInfo *VNI = UseLI.getVNInfoAt (LIS->getInstructionIndex (*UseMI));
12341233
1235- SlotIndex DefMBBStart =
1236- LIS->getMBBStartIdx (LIS->getMBBFromIndex (VNInfo->def ));
1234+ SlotIndex DefMBBStart = LIS->getMBBStartIdx (LIS->getMBBFromIndex (VNI->def ));
12371235
12381236 // If the def is in the block, then it must be the only reaching def.
1239- if (DefMBBStart != VNInfo ->def ) {
1240- DefIdxs.push_back (VNInfo ->def );
1241- return VNInfo-> def ;
1237+ if (DefMBBStart != VNI ->def ) {
1238+ DefIdxs.push_back (VNI ->def );
1239+ return ;
12421240 }
12431241
12441242 SmallPtrSet<MachineBasicBlock *, 8 > Visited;
@@ -1256,15 +1254,15 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
12561254 MachineBasicBlock *CurrMBB = Worklist.pop_back_val ();
12571255
12581256 SlotIndex CurrMBBEnd = LIS->getMBBEndIdx (CurrMBB);
1259- auto VNInfo = UseLI.getVNInfoAt (CurrMBBEnd.getPrevSlot ());
1257+ VNInfo *VNI = UseLI.getVNInfoAt (CurrMBBEnd.getPrevSlot ());
12601258
1261- MachineBasicBlock *DefMBB = LIS->getMBBFromIndex (VNInfo ->def );
1259+ MachineBasicBlock *DefMBB = LIS->getMBBFromIndex (VNI ->def );
12621260 SlotIndex DefMBBStart = LIS->getMBBStartIdx (DefMBB);
12631261
12641262 // If there is a def in this block, then add it to the list. This is the
12651263 // reaching def of this path.
1266- if (DefMBBStart != VNInfo ->def ) {
1267- DefIdxs.push_back (VNInfo ->def );
1264+ if (DefMBBStart != VNI ->def ) {
1265+ DefIdxs.push_back (VNI ->def );
12681266 continue ;
12691267 }
12701268
@@ -1273,8 +1271,6 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
12731271 Worklist.push_back (PredMBB);
12741272 }
12751273 }
1276-
1277- return VNInfo->def ;
12781274}
12791275
12801276void RewriteScheduleStage::findReachingUses (
@@ -1288,9 +1284,9 @@ void RewriteScheduleStage::findReachingUses(
12881284
12891285 // If we find a use that contains this DefMI in its reachingDefs, then it is
12901286 // a reaching use.
1291- if (find_if (ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1287+ if (any_of (ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
12921288 return SlotIndex::isSameInstr (RDIdx, DefIdx);
1293- }) != ReachingDefIndexes. end () )
1289+ }))
12941290 ReachingUses.push_back (&UseMO);
12951291 }
12961292}
@@ -1966,27 +1962,29 @@ bool RewriteScheduleStage::initHeuristics(
19661962 // Prepare for the heuristics
19671963 for (auto &MBB : MF) {
19681964 for (auto &MI : MBB) {
1969- if (isRewriteCandidate (&MI)) {
1970- int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ());
1971- if (ReplacementOp == -1 )
1972- continue ;
1965+ if (!isRewriteCandidate (&MI))
1966+ continue ;
19731967
1974- RewriteCands.push_back ({&MI, MI.getOpcode ()});
1975- MI.setDesc (TII->get (ReplacementOp));
1968+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ());
1969+ if (ReplacementOp == -1 )
1970+ continue ;
19761971
1977- MachineOperand *Src2 = TII->getNamedOperand (MI, AMDGPU::OpName::src2);
1978- if (Src2->isReg ()) {
1979- SmallVector<SlotIndex, 8 > Src2ReachingDefs;
1980- findReachingDefs (*Src2, DAG.LIS , Src2ReachingDefs);
1972+ RewriteCands.push_back ({&MI, MI.getOpcode ()});
1973+ MI.setDesc (TII->get (ReplacementOp));
19811974
1982- // For any definition of the src2 register which is non-MFMA, we
1983- // insert a copy.
1984- for (SlotIndex RDIdx : Src2ReachingDefs) {
1985- MachineInstr *RD = DAG.LIS ->getInstructionFromIndex (RDIdx);
1986- if (!TII->isMAI (*RD))
1987- CopyForDef.insert (RD);
1988- }
1975+ MachineOperand *Src2 = TII->getNamedOperand (MI, AMDGPU::OpName::src2);
1976+ if (Src2->isReg ()) {
1977+ SmallVector<SlotIndex, 8 > Src2ReachingDefs;
1978+ findReachingDefs (*Src2, DAG.LIS , Src2ReachingDefs);
1979+
1980+ // For any definition of the src2 register which is non-MFMA, we
1981+ // insert a copy.
1982+ for (SlotIndex RDIdx : Src2ReachingDefs) {
1983+ MachineInstr *RD = DAG.LIS ->getInstructionFromIndex (RDIdx);
1984+ if (!TII->isMAI (*RD))
1985+ CopyForDef.insert (RD);
19891986 }
1987+ }
19901988
19911989 MachineOperand &Dst = MI.getOperand (0 );
19921990 SmallVector<MachineOperand *, 8 > DstReachingUses;
@@ -2024,36 +2022,39 @@ bool RewriteScheduleStage::initHeuristics(
20242022 DAG.MRI .setRegClass (Dst.getReg (), AGPRRC);
20252023 if (Src2->isReg ())
20262024 DAG.MRI .setRegClass (Src2->getReg (), AGPRRC);
2027- }
20282025 }
20292026 }
20302027
20312028 return true ;
20322029}
20332030
20342031int64_t RewriteScheduleStage::getRewriteCost (
2035- std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands,
2036- DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2037- SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2032+ const std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands,
2033+ const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2034+ const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2035+ MachineBranchProbabilityInfo MBPI;
2036+ MachineBlockFrequencyInfo MBFI;
2037+
20382038 MBFI.calculate (MF, MBPI, *DAG.MLI );
20392039 int64_t BestSpillCost = 0 ;
20402040 int64_t Cost = 0 ;
20412041
2042+ uint64_t EntryFreq = MBFI.getEntryFreq ().getFrequency ();
2043+
20422044 for (unsigned Region = 0 ; Region < DAG.Regions .size (); Region++) {
20432045 if (!RegionsWithExcessArchVGPR[Region])
20442046 continue ;
20452047
2046- auto PressureBefore = DAG.Pressure [Region];
2047- unsigned SpillCostBefore = PressureBefore.getVGPRSpills (ST, MF);
2048+ GCNRegPressure & PressureBefore = DAG.Pressure [Region];
2049+ unsigned SpillCostBefore = PressureBefore.getVGPRSpills (MF);
20482050
20492051 // For the cases we care about (i.e. ArchVGPR usage is greater than the
20502052 // addressable limit), rewriting alone should bring pressure to manageable
20512053 // level. If we find any such region, then the rewrite is potentially
20522054 // beneficial.
2053- auto PressureAfter = DAG.getRealRegPressure (Region);
2054- unsigned SpillCostAfter = PressureAfter.getVGPRSpills (ST, MF);
2055+ GCNRegPressure PressureAfter = DAG.getRealRegPressure (Region);
2056+ unsigned SpillCostAfter = PressureAfter.getVGPRSpills (MF);
20552057
2056- uint64_t EntryFreq = MBFI.getEntryFreq ().getFrequency ();
20572058 uint64_t BlockFreq =
20582059 MBFI.getBlockFreq (DAG.Regions [Region].first ->getParent ())
20592060 .getFrequency ();
@@ -2090,8 +2091,6 @@ int64_t RewriteScheduleStage::getRewriteCost(
20902091
20912092 unsigned CopyCost = 0 ;
20922093
2093- uint64_t EntryFreq = MBFI.getEntryFreq ().getFrequency ();
2094-
20952094 // For each CopyForDef, increase the cost by the register size while
20962095 // accounting for block frequency.
20972096 for (auto *DefMI : CopyForDef) {
@@ -2107,12 +2106,11 @@ int64_t RewriteScheduleStage::getRewriteCost(
21072106 }
21082107
21092108 // Account for CopyForUse copies in each block that the register is used.
2110- for (auto &UseEntry : CopyForUse) {
2109+ for (auto &[UseBlock, UseRegs] : CopyForUse) {
21112110 uint64_t UseFreq =
2112- EntryFreq ? MBFI.getBlockFreq (UseEntry.first ).getFrequency () / EntryFreq
2113- : 1 ;
2111+ EntryFreq ? MBFI.getBlockFreq (UseBlock).getFrequency () / EntryFreq : 1 ;
21142112
2115- for (auto UseReg : UseEntry. second ) {
2113+ for (auto UseReg : UseRegs ) {
21162114 unsigned RegSize =
21172115 DAG.TRI ->getRegSizeInBits (*DAG.MRI .getRegClass (UseReg));
21182116 unsigned NumRegs = std::max (RegSize / 32 , (unsigned )1 );
@@ -2124,9 +2122,7 @@ int64_t RewriteScheduleStage::getRewriteCost(
21242122
21252123 // Reset to the vgpr form. We must do rewriting after copy-insertion, as some
21262124 // defs of the register may require VGPR.
2127- for (auto RI : RewriteCands) {
2128- MachineInstr *MI = RI.first ;
2129-
2125+ for (auto &[MI, OriginalOpcode] : RewriteCands) {
21302126 assert (TII->isMAI (*MI));
21312127 const TargetRegisterClass *AGPRRC =
21322128 DAG.MRI .getRegClass (MI->getOperand (0 ).getReg ());
@@ -2135,18 +2131,17 @@ int64_t RewriteScheduleStage::getRewriteCost(
21352131 MachineOperand *Src2 = TII->getNamedOperand (*MI, AMDGPU::OpName::src2);
21362132 assert (Src2);
21372133
2138- if (Src2->isReg ()) {
2134+ if (Src2->isReg ())
21392135 DAG.MRI .setRegClass (Src2->getReg (), VGPRRC);
2140- }
21412136 DAG.MRI .setRegClass (MI->getOperand (0 ).getReg (), VGPRRC);
2142- MI->setDesc (TII->get (RI. second ));
2137+ MI->setDesc (TII->get (OriginalOpcode ));
21432138 }
21442139
21452140 return Cost;
21462141}
21472142
21482143bool RewriteScheduleStage::rewrite (
2149- std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands) {
2144+ const std::vector<std::pair<MachineInstr *, unsigned >> &RewriteCands) {
21502145 DenseMap<MachineInstr *, unsigned > FirstMIToRegion;
21512146 DenseMap<MachineInstr *, unsigned > LastMIToRegion;
21522147
@@ -2180,7 +2175,7 @@ bool RewriteScheduleStage::rewrite(
21802175 // want to replace the register it is using with the result of the copy, we
21812176 // must handle case 3. In the third case, we simply insert a copy after each
21822177 // of the reaching defs to connect to the copy of the reaching uses of the dst
2183- // reg. This allows us to avoid inserting copies next to the' MFMAs.
2178+ // reg. This allows us to avoid inserting copies next to the MFMAs.
21842179 //
21852180 // While inserting the copies, we maintain a map of operands which will use
21862181 // different regs (i.e. the result of the copies). For example, a case 1 src2
@@ -2191,14 +2186,14 @@ bool RewriteScheduleStage::rewrite(
21912186 // queries.
21922187 //
21932188 // While inserting the copies, we also maintain a list or registers which we
2194- // will want to reclassify as AGPR. After doing the copy isnertion and the
2189+ // will want to reclassify as AGPR. After doing the copy insertion and the
21952190 // register replacement, we can finally do the reclassification. This uses the
21962191 // redef map, as the registers we are interested in reclassifying may be
21972192 // replaced by the result of a copy. We must do this after the copy analysis
21982193 // and placement as we must have an accurate redef map -- otherwise we may end
21992194 // up creating illegal instructions.
22002195
2201- // The original registers of the MFMA that need to be reclassified as AGPR
2196+ // The original registers of the MFMA that need to be reclassified as AGPR.
22022197 std::set<Register> RewriteRegs;
22032198 // The map of an original register in the MFMA to a new register (result of a
22042199 // copy) that it should be replaced with.
@@ -2212,16 +2207,15 @@ bool RewriteScheduleStage::rewrite(
22122207 DenseMap<unsigned , DenseMap<Register, SmallPtrSet<MachineOperand *, 8 >>>
22132208 ReachingUseTracker;
22142209
2215- for (auto &RI : RewriteCands) {
2216- MachineInstr &MI = *RI.first ;
2210+ for (auto &[MI, OriginalOpcode] : RewriteCands) {
22172211
2218- int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI. getOpcode ());
2212+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (MI-> getOpcode ());
22192213 if (ReplacementOp == -1 )
22202214 continue ;
2221- MI. setDesc (TII->get (ReplacementOp));
2215+ MI-> setDesc (TII->get (ReplacementOp));
22222216
22232217 // Case 1: insert copies for the reaching defs of the Src2Reg.
2224- MachineOperand *Src2 = TII->getNamedOperand (MI, AMDGPU::OpName::src2);
2218+ MachineOperand *Src2 = TII->getNamedOperand (* MI, AMDGPU::OpName::src2);
22252219
22262220 if (Src2->isReg ()) {
22272221 Register Src2Reg = Src2->getReg ();
@@ -2291,7 +2285,7 @@ bool RewriteScheduleStage::rewrite(
22912285 // Case 2 and Case 3: insert copies before the reaching uses of the dsts,
22922286 // and after the reaching defs of the reaching uses of the dsts.
22932287
2294- MachineOperand *Dst = &MI. getOperand (0 );
2288+ MachineOperand *Dst = &MI-> getOperand (0 );
22952289 Register DstReg = Dst->getReg ();
22962290 if (!DstReg.isVirtual ())
22972291 return false ;
@@ -2302,7 +2296,7 @@ bool RewriteScheduleStage::rewrite(
23022296 SmallVector<MachineOperand *, 8 > DstReachingUseCopies;
23032297 SmallVector<MachineInstr *, 8 > DstUseDefsReplace;
23042298
2305- findReachingUses (& MI, DAG.LIS , DstReachingUses);
2299+ findReachingUses (MI, DAG.LIS , DstReachingUses);
23062300
23072301 for (MachineOperand *RUOp : DstReachingUses) {
23082302 if (TII->isMAI (*RUOp->getParent ()))
@@ -2366,7 +2360,7 @@ bool RewriteScheduleStage::rewrite(
23662360 MachineBasicBlock *RUBlock = RU->getParent ()->getParent ();
23672361 // Just keep track of the reaching use of this register by block. After we
23682362 // have scanned all the MFMAs we can find optimal insert pts.
2369- if (RUBlock != MI. getParent ()) {
2363+ if (RUBlock != MI-> getParent ()) {
23702364 ReachingUseTracker[RUBlock->getNumber ()][DstReg].insert (RU);
23712365 continue ;
23722366 }
0 commit comments