@@ -1086,7 +1086,8 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
10861086}
10871087
10881088// / Allows to easily filter for this stage's debug output.
1089- #define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << " [PreRARemat] " ; X;)
1089+ #define REMAT_PREFIX " [PreRARemat] "
1090+ #define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
10901091
10911092bool PreRARematStage::initGCNSchedStage () {
10921093 // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for
@@ -1115,10 +1116,15 @@ bool PreRARematStage::initGCNSchedStage() {
11151116 rematerialize ();
11161117 if (GCNTrackers)
11171118 DAG.RegionLiveOuts .buildLiveRegMap ();
1118- REMAT_DEBUG (
1119- dbgs () << " Retrying function scheduling with new min. occupancy of "
1120- << AchievedOcc << " from rematerializing (original was "
1121- << DAG.MinOccupancy << " , target was " << TargetOcc << " )\n " );
1119+ REMAT_DEBUG ({
1120+ dbgs () << " Retrying function scheduling with new min. occupancy of "
1121+ << AchievedOcc << " from rematerializing (original was "
1122+ << DAG.MinOccupancy ;
1123+ if (TargetOcc)
1124+ dbgs () << " , target was " << *TargetOcc;
1125+ dbgs () << " )\n " ;
1126+ });
1127+
11221128 if (AchievedOcc > DAG.MinOccupancy ) {
11231129 DAG.MinOccupancy = AchievedOcc;
11241130 SIMachineFunctionInfo &MFI = *MF.getInfo <SIMachineFunctionInfo>();
@@ -1540,8 +1546,7 @@ bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
15401546
15411547bool PreRARematStage::shouldRevertScheduling (unsigned WavesAfter) {
15421548 return GCNSchedStage::shouldRevertScheduling (WavesAfter) ||
1543- mayCauseSpilling (WavesAfter) ||
1544- (IncreaseOccupancy && WavesAfter < TargetOcc);
1549+ mayCauseSpilling (WavesAfter) || (TargetOcc && WavesAfter < TargetOcc);
15451550}
15461551
15471552bool ILPInitialScheduleStage::shouldRevertScheduling (unsigned WavesAfter) {
@@ -1687,78 +1692,63 @@ bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
16871692}
16881693
16891694bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
1690- REMAT_DEBUG ({
1691- dbgs () << " Collecting rematerializable instructions in " ;
1692- MF.getFunction ().printAsOperand (dbgs (), false );
1693- dbgs () << ' \n ' ;
1694- });
1695+ const Function &F = MF.getFunction ();
16951696
16961697 // Maps optimizable regions (i.e., regions at minimum and register-limited
16971698 // occupancy, or regions with spilling) to the target RP we would like to
16981699 // reach.
16991700 DenseMap<unsigned , GCNRPTarget> OptRegions;
1700- const Function &F = MF.getFunction ();
1701- unsigned DynamicVGPRBlockSize =
1702- MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
1703-
1704- std::pair<unsigned , unsigned > WavesPerEU = ST.getWavesPerEU (F);
1705- const unsigned MaxSGPRsNoSpill = ST.getMaxNumSGPRs (F);
1706- const unsigned MaxVGPRsNoSpill = ST.getMaxNumVGPRs (F);
1707- const unsigned MaxSGPRsIncOcc =
1708- ST.getMaxNumSGPRs (DAG.MinOccupancy + 1 , false );
1709- const unsigned MaxVGPRsIncOcc =
1710- ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 , DynamicVGPRBlockSize);
1711- IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
1712-
1713- // Collect optimizable regions. If there is spilling in any region we will
1714- // just try to reduce spilling. Otherwise we will try to increase occupancy by
1715- // one in the whole function.
1716- for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1717- GCNRegPressure &RP = DAG.Pressure [I];
1718- // We allow ArchVGPR or AGPR savings to count as savings of the other kind
1719- // of VGPR only when trying to eliminate spilling. We cannot do this when
1720- // trying to increase occupancy since VGPR class swaps only occur later in
1721- // the register allocator i.e., the scheduler will not be able to reason
1722- // about these savings and will not report an increase in the achievable
1723- // occupancy, triggering rollbacks.
1724- GCNRPTarget Target (MaxSGPRsNoSpill, MaxVGPRsNoSpill, MF, RP,
1725- /* CombineVGPRSavings=*/ true );
1726- if (!Target.satisfied () && IncreaseOccupancy) {
1727- // There is spilling in the region and we were so far trying to increase
1728- // occupancy. Strop trying that and focus on reducing spilling.
1729- IncreaseOccupancy = false ;
1730- OptRegions.clear ();
1731- } else if (IncreaseOccupancy) {
1732- // There is no spilling in the region, try to increase occupancy.
1733- Target = GCNRPTarget (MaxSGPRsIncOcc, MaxVGPRsIncOcc, MF, RP,
1734- /* CombineVGPRSavings=*/ false );
1701+ unsigned MaxSGPRs = ST.getMaxNumSGPRs (F);
1702+ unsigned MaxVGPRs = ST.getMaxNumVGPRs (F);
1703+ auto ResetTargetRegions = [&]() {
1704+ OptRegions.clear ();
1705+ for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1706+ const GCNRegPressure &RP = DAG.Pressure [I];
1707+ GCNRPTarget Target (MaxSGPRs, MaxVGPRs, MF, RP);
1708+ if (!Target.satisfied ())
1709+ OptRegions.insert ({I, Target});
17351710 }
1736- if (!Target.satisfied ())
1737- OptRegions.insert ({I, Target});
1738- }
1739- if (OptRegions.empty ())
1740- return false ;
1711+ };
17411712
1742- #ifndef NDEBUG
1743- if (IncreaseOccupancy) {
1744- REMAT_DEBUG (dbgs () << " Occupancy minimal (" << DAG.MinOccupancy
1745- << " ) in regions:\n " );
1713+ ResetTargetRegions ();
1714+ if (!OptRegions.empty () || DAG.MinOccupancy >= MFI.getMaxWavesPerEU ()) {
1715+ // In addition to register usage being above addressable limits, occupancy
1716+ // below the minimum is considered like "spilling" as well.
1717+ TargetOcc = std::nullopt ;
17461718 } else {
1747- REMAT_DEBUG (dbgs () << " Spilling w.r.t. minimum target occupancy ("
1748- << WavesPerEU.first << " ) in regions:\n " );
1749- }
1750- for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1751- if (auto OptIt = OptRegions.find (I); OptIt != OptRegions.end ())
1752- REMAT_DEBUG (dbgs () << " [" << I << " ] " << OptIt->getSecond () << ' \n ' );
1719+ // There is no spilling and room to improve occupancy; set up "increased
1720+ // occupancy targets" for all regions.
1721+ TargetOcc = DAG.MinOccupancy + 1 ;
1722+ unsigned VGPRBlockSize =
1723+ MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
1724+ MaxSGPRs = ST.getMaxNumSGPRs (*TargetOcc, false );
1725+ MaxVGPRs = ST.getMaxNumVGPRs (*TargetOcc, VGPRBlockSize);
1726+ ResetTargetRegions ();
17531727 }
1754- #endif
1755-
1756- // When we are reducing spilling, the target is the minimum target number of
1757- // waves/EU determined by the subtarget. In cases where either one of
1758- // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1759- // minimum region occupancy may be higher than the latter.
1760- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1761- : std::max (DAG.MinOccupancy , WavesPerEU.first );
1728+ REMAT_DEBUG ({
1729+ dbgs () << " Analyzing " ;
1730+ MF.getFunction ().printAsOperand (dbgs (), false );
1731+ dbgs () << " : " ;
1732+ if (OptRegions.empty ()) {
1733+ dbgs () << " no objective to achieve, occupancy is maximal at "
1734+ << MFI.getMaxWavesPerEU ();
1735+ } else if (!TargetOcc) {
1736+ dbgs () << " reduce spilling (minimum target occupancy is "
1737+ << MFI.getMinWavesPerEU () << ' )' ;
1738+ } else {
1739+ dbgs () << " increase occupancy from " << DAG.MinOccupancy << " to "
1740+ << TargetOcc;
1741+ }
1742+ dbgs () << ' \n ' ;
1743+ for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
1744+ if (auto OptIt = OptRegions.find (I); OptIt != OptRegions.end ()) {
1745+ dbgs () << REMAT_PREFIX << " [" << I << " ] " << OptIt->getSecond ()
1746+ << ' \n ' ;
1747+ }
1748+ }
1749+ });
1750+ if (OptRegions.empty ())
1751+ return false ;
17621752
17631753 // Accounts for a reduction in RP in an optimizable region. Returns whether we
17641754 // estimate that we have identified enough rematerialization opportunities to
@@ -1767,7 +1757,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
17671757 auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,
17681758 bool &Progress) -> bool {
17691759 GCNRPTarget &Target = OptIt->getSecond ();
1770- if (!Target.isSaveBeneficial (Reg, DAG. MRI ))
1760+ if (!Target.isSaveBeneficial (Reg))
17711761 return false ;
17721762 Progress = true ;
17731763 Target.saveReg (Reg, Mask, DAG.MRI );
@@ -1876,7 +1866,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18761866 }
18771867 }
18781868
1879- if (IncreaseOccupancy ) {
1869+ if (TargetOcc ) {
18801870 // We were trying to increase occupancy but failed, abort the stage.
18811871 REMAT_DEBUG (dbgs () << " Cannot increase occupancy\n " );
18821872 Rematerializations.clear ();
@@ -1979,7 +1969,9 @@ void PreRARematStage::rematerialize() {
19791969 // All regions impacted by at least one rematerialization must be rescheduled.
19801970 // Maximum pressure must also be recomputed for all regions where it changed
19811971 // non-predictably and checked against the target occupancy.
1982- AchievedOcc = TargetOcc;
1972+ unsigned DynamicVGPRBlockSize =
1973+ MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
1974+ AchievedOcc = MFI.getMaxWavesPerEU ();
19831975 for (auto &[I, OriginalRP] : ImpactedRegions) {
19841976 bool IsEmptyRegion = DAG.Regions [I].first == DAG.Regions [I].second ;
19851977 RescheduleRegions[I] = !IsEmptyRegion;
@@ -2003,9 +1995,8 @@ void PreRARematStage::rematerialize() {
20031995 }
20041996 }
20051997 DAG.Pressure [I] = RP;
2006- AchievedOcc = std::min (
2007- AchievedOcc, RP.getOccupancy (ST, MF.getInfo <SIMachineFunctionInfo>()
2008- ->getDynamicVGPRBlockSize ()));
1998+ AchievedOcc =
1999+ std::min (AchievedOcc, RP.getOccupancy (ST, DynamicVGPRBlockSize));
20092000 }
20102001 REMAT_DEBUG (dbgs () << " Achieved occupancy " << AchievedOcc << " \n " );
20112002}
@@ -2035,7 +2026,7 @@ void PreRARematStage::finalizeGCNSchedStage() {
20352026 // which case we do not want to rollback either (the rescheduling was already
20362027 // reverted in PreRARematStage::shouldRevertScheduling in such cases).
20372028 unsigned MaxOcc = std::max (AchievedOcc, DAG.MinOccupancy );
2038- if (!IncreaseOccupancy || MaxOcc >= TargetOcc)
2029+ if (!TargetOcc || MaxOcc >= * TargetOcc)
20392030 return ;
20402031
20412032 REMAT_DEBUG (dbgs () << " Rolling back all rematerializations\n " );
0 commit comments