@@ -1082,7 +1082,8 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
10821082}
10831083
10841084// / Allows to easily filter for this stage's debug output.
1085- #define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << " [PreRARemat] " ; X;)
1085+ #define REMAT_PREFIX " [PreRARemat] "
1086+ #define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
10861087
10871088bool PreRARematStage::initGCNSchedStage () {
10881089 // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
@@ -1112,10 +1113,15 @@ bool PreRARematStage::initGCNSchedStage() {
11121113 rematerialize ();
11131114 if (GCNTrackers)
11141115 DAG.RegionLiveOuts .buildLiveRegMap ();
1115- REMAT_DEBUG (
1116- dbgs () << " Retrying function scheduling with new min. occupancy of "
1117- << AchievedOcc << " from rematerializing (original was "
1118- << DAG.MinOccupancy << " , target was " << TargetOcc << " )\n " );
1116+ REMAT_DEBUG ({
1117+ dbgs () << " Retrying function scheduling with new min. occupancy of "
1118+ << AchievedOcc << " from rematerializing (original was "
1119+ << DAG.MinOccupancy ;
1120+ if (TargetOcc)
1121+ dbgs () << " , target was " << *TargetOcc;
1122+ dbgs () << " )\n " ;
1123+ });
1124+
11191125 if (AchievedOcc > DAG.MinOccupancy ) {
11201126 DAG.MinOccupancy = AchievedOcc;
11211127 SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
@@ -1546,8 +1552,7 @@ bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
15461552
15471553bool PreRARematStage::shouldRevertScheduling (unsigned WavesAfter) {
15481554 return GCNSchedStage::shouldRevertScheduling (WavesAfter) ||
1549- mayCauseSpilling (WavesAfter) ||
1550- (IncreaseOccupancy && WavesAfter < TargetOcc);
1555+ mayCauseSpilling (WavesAfter) || (TargetOcc && WavesAfter < TargetOcc);
15511556}
15521557
15531558bool ILPInitialScheduleStage::shouldRevertScheduling (unsigned WavesAfter) {
@@ -1696,78 +1701,63 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
16961701}
16971702
16981703bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
1699- REMAT_DEBUG ({
1700- dbgs () << " Collecting rematerializable instructions in " ;
1701- MF.getFunction ().printAsOperand (dbgs (), false );
1702- dbgs () << ' \n ' ;
1703- });
1704+ const Function &F = MF.getFunction ();
17041705
17051706 // Maps optimizable regions (i.e., regions at minimum and register-limited
17061707 // occupancy, or regions with spilling) to the target RP we would like to
17071708 // reach.
17081709 DenseMap<unsigned , GCNRPTarget> OptRegions;
1709- const Function &F = MF.getFunction ();
1710- unsigned DynamicVGPRBlockSize =
1711- MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
1712-
1713- std::pair<unsigned , unsigned > WavesPerEU = ST.getWavesPerEU (F);
1714- const unsigned MaxSGPRsNoSpill = ST.getMaxNumSGPRs (F);
1715- const unsigned MaxVGPRsNoSpill = ST.getMaxNumVGPRs (F);
1716- const unsigned MaxSGPRsIncOcc =
1717- ST.getMaxNumSGPRs (DAG.MinOccupancy + 1 , false );
1718- const unsigned MaxVGPRsIncOcc =
1719- ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 , DynamicVGPRBlockSize);
1720- IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
1721-
1722- // Collect optimizable regions. If there is spilling in any region we will
1723- // just try to reduce spilling. Otherwise we will try to increase occupancy by
1724- // one in the whole function.
1725- for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1726- GCNRegPressure &RP = DAG.Pressure [I];
1727- // We allow ArchVGPR or AGPR savings to count as savings of the other kind
1728- // of VGPR only when trying to eliminate spilling. We cannot do this when
1729- // trying to increase occupancy since VGPR class swaps only occur later in
1730- // the register allocator i.e., the scheduler will not be able to reason
1731- // about these savings and will not report an increase in the achievable
1732- // occupancy, triggering rollbacks.
1733- GCNRPTarget Target (MaxSGPRsNoSpill, MaxVGPRsNoSpill, MF, RP,
1734- /* CombineVGPRSavings=*/ true );
1735- if (!Target.satisfied () && IncreaseOccupancy) {
1736- // There is spilling in the region and we were so far trying to increase
1737- // occupancy. Strop trying that and focus on reducing spilling.
1738- IncreaseOccupancy = false ;
1739- OptRegions.clear ();
1740- } else if (IncreaseOccupancy) {
1741- // There is no spilling in the region, try to increase occupancy.
1742- Target = GCNRPTarget (MaxSGPRsIncOcc, MaxVGPRsIncOcc, MF, RP,
1743- /* CombineVGPRSavings=*/ false );
1710+ unsigned MaxSGPRs = ST.getMaxNumSGPRs (F);
1711+ unsigned MaxVGPRs = ST.getMaxNumVGPRs (F);
1712+ auto ResetTargetRegions = [&]() {
1713+ OptRegions.clear ();
1714+ for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1715+ const GCNRegPressure &RP = DAG.Pressure [I];
1716+ GCNRPTarget Target (MaxSGPRs, MaxVGPRs, MF, RP);
1717+ if (!Target.satisfied ())
1718+ OptRegions.insert ({I, Target});
17441719 }
1745- if (!Target.satisfied ())
1746- OptRegions.insert ({I, Target});
1747- }
1748- if (OptRegions.empty ())
1749- return false ;
1720+ };
17501721
1751- #ifndef NDEBUG
1752- if (IncreaseOccupancy) {
1753- REMAT_DEBUG (dbgs () << " Occupancy minimal (" << DAG.MinOccupancy
1754- << " ) in regions:\n " );
1722+ ResetTargetRegions ();
1723+ if (!OptRegions.empty () || DAG.MinOccupancy >= MFI.getMaxWavesPerEU ()) {
1724+ // In addition to register usage being above addressable limits, occupancy
1725+ // below the minimum is considered like "spilling" as well.
1726+ TargetOcc = std::nullopt ;
17551727 } else {
1756- REMAT_DEBUG (dbgs () << " Spilling w.r.t. minimum target occupancy ("
1757- << WavesPerEU.first << " ) in regions:\n " );
1758- }
1759- for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1760- if (auto OptIt = OptRegions.find (I); OptIt != OptRegions.end ())
1761- REMAT_DEBUG (dbgs () << " [" << I << " ] " << OptIt->getSecond () << ' \n ' );
1728+ // There is no spilling and room to improve occupancy; set up "increased
1729+ // occupancy targets" for all regions.
1730+ TargetOcc = DAG.MinOccupancy + 1 ;
1731+ unsigned VGPRBlockSize =
1732+ MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
1733+ MaxSGPRs = ST.getMaxNumSGPRs (*TargetOcc, false );
1734+ MaxVGPRs = ST.getMaxNumVGPRs (*TargetOcc, VGPRBlockSize);
1735+ ResetTargetRegions ();
17621736 }
1763- #endif
1764-
1765- // When we are reducing spilling, the target is the minimum target number of
1766- // waves/EU determined by the subtarget. In cases where either one of
1767- // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1768- // minimum region occupancy may be higher than the latter.
1769- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1770- : std::max (DAG.MinOccupancy , WavesPerEU.first );
1737+ REMAT_DEBUG ({
1738+ dbgs () << " Analyzing " ;
1739+ MF.getFunction ().printAsOperand (dbgs (), false );
1740+ dbgs () << " : " ;
1741+ if (OptRegions.empty ()) {
1742+ dbgs () << " no objective to achieve, occupancy is maximal at "
1743+ << MFI.getMaxWavesPerEU ();
1744+ } else if (!TargetOcc) {
1745+ dbgs () << " reduce spilling (minimum target occupancy is "
1746+ << MFI.getMinWavesPerEU () << ' )' ;
1747+ } else {
1748+ dbgs () << " increase occupancy from " << DAG.MinOccupancy << " to "
1749+ << TargetOcc;
1750+ }
1751+ dbgs () << ' \n ' ;
1752+ for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1753+ if (auto OptIt = OptRegions.find (I); OptIt != OptRegions.end ()) {
1754+ dbgs () << REMAT_PREFIX << " [" << I << " ] " << OptIt->getSecond ()
1755+ << ' \n ' ;
1756+ }
1757+ }
1758+ });
1759+ if (OptRegions.empty ())
1760+ return false ;
17711761
17721762 // Accounts for a reduction in RP in an optimizable region. Returns whether we
17731763 // estimate that we have identified enough rematerialization opportunities to
@@ -1776,7 +1766,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
17761766 auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
17771767 bool &Progress) -> bool {
17781768 GCNRPTarget &Target = OptIt->getSecond ();
1779- if (!Target.isSaveBeneficial (Reg, DAG. MRI ))
1769+ if (!Target.isSaveBeneficial (Reg))
17801770 return false ;
17811771 Progress = true ;
17821772 Target.saveReg (Reg, Mask, DAG.MRI );
@@ -1885,7 +1875,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18851875 }
18861876 }
18871877
1888- if (IncreaseOccupancy ) {
1878+ if (TargetOcc ) {
18891879 // We were trying to increase occupancy but failed, abort the stage.
18901880 REMAT_DEBUG (dbgs () << " Cannot increase occupancy\n " );
18911881 Rematerializations.clear ();
@@ -1988,7 +1978,9 @@ void PreRARematStage::rematerialize() {
19881978 // All regions impacted by at least one rematerialization must be rescheduled.
19891979 // Maximum pressure must also be recomputed for all regions where it changed
19901980 // non-predictably and checked against the target occupancy.
1991- AchievedOcc = TargetOcc;
1981+ unsigned DynamicVGPRBlockSize =
1982+ MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
1983+ AchievedOcc = MFI.getMaxWavesPerEU ();
19921984 for (auto &[I, OriginalRP] : ImpactedRegions) {
19931985 bool IsEmptyRegion = DAG.Regions [I].first == DAG.Regions [I].second ;
19941986 RescheduleRegions[I] = !IsEmptyRegion;
@@ -2012,9 +2004,8 @@ void PreRARematStage::rematerialize() {
20122004 }
20132005 }
20142006 DAG.Pressure [I] = RP;
2015- AchievedOcc = std::min (
2016- AchievedOcc, RP.getOccupancy (ST, MF.getInfo <SIMachineFunctionInfo>()
2017- ->getDynamicVGPRBlockSize ()));
2007+ AchievedOcc =
2008+ std::min (AchievedOcc, RP.getOccupancy (ST, DynamicVGPRBlockSize));
20182009 }
20192010 REMAT_DEBUG (dbgs () << " Achieved occupancy " << AchievedOcc << " \n " );
20202011}
@@ -2044,7 +2035,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
20442035 // which case we do not want to rollback either (the rescheduling was already
20452036 // reverted in PreRARematStage::shouldRevertScheduling in such cases).
20462037 unsigned MaxOcc = std::max (AchievedOcc, DAG.MinOccupancy );
2047- if (!IncreaseOccupancy || MaxOcc >= TargetOcc)
2038+ if (!TargetOcc || MaxOcc >= * TargetOcc)
20482039 return ;
20492040
20502041 REMAT_DEBUG (dbgs () << " Rolling back all rematerializations\n " );
0 commit comments