@@ -1703,11 +1703,13 @@ namespace {
17031703// / Models excess register pressure in a region and tracks our progress as we
17041704// / identify rematerialization opportunities.
17051705struct ExcessRP {
1706+ // / Number of excess SGPRs.
1707+ unsigned SGPRs = 0 ;
17061708 // / Number of excess ArchVGPRs.
17071709 unsigned ArchVGPRs = 0 ;
17081710 // / Number of excess AGPRs.
17091711 unsigned AGPRs = 0 ;
1710- // / For unified register files, number of excess VGPRs.
1712+ // / For unified register files, number of excess VGPRs. 0 otherwise.
17111713 unsigned VGPRs = 0 ;
17121714 // / For unified register files with AGPR usage, number of excess ArchVGPRs to
17131715 // / save before we are able to save a whole allocation granule.
@@ -1716,28 +1718,37 @@ struct ExcessRP {
17161718 bool HasAGPRs = false ;
17171719 // / Whether the subtarget has a unified RF.
17181720 bool UnifiedRF;
1721+ // / Whether we consider that the register allocator will be able to swap
1722+ // / between ArchVGPRs and AGPRs by copying them to a super register class.
1723+ // / Concretely, this allows savings of one kind of VGPR to help toward savings
1724+ // / the other kind of VGPR.
1725+ bool CombineVGPRSavings;
17191726
17201727 // / Constructs the excess RP model; determines the excess pressure w.r.t. a
1721- // / maximum number of allowed VGPRs.
1722- ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
1728+ // / maximum number of allowed SGPRs/VGPRs.
1729+ ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
1730+ unsigned MaxVGPRs, bool CombineVGPRSavings);
17231731
1724- // / Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
1725- // / UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
1726- // / AGPRs once excess ArchVGPR pressure has been eliminated. Returns whether
1727- // / saving these ArchVGPRs helped reduce excess pressure.
1728- bool saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
1732+ // / Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
1733+ // / these SGPRs helped reduce excess pressure.
1734+ bool saveSGPRs (unsigned NumRegs) { return saveRegs (SGPRs, NumRegs); }
17291735
1730- // / Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
1731- // / these ArchVGPRs helped reduce excess pressure.
1736+ // / Accounts for \p NumRegs saved ArchVGPRs in the model. Returns whether
1737+ // / saving these ArchGPRs helped reduce excess pressure.
1738+ bool saveArchVGPRs (unsigned NumRegs);
1739+
1740+ // / Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
1741+ // / these AGPRs helped reduce excess pressure.
17321742 bool saveAGPRs (unsigned NumRegs);
17331743
17341744 // / Returns whether there is any excess register pressure.
1735- operator bool () const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0 ; }
1745+ operator bool () const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
17361746
17371747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
17381748 friend raw_ostream &operator <<(raw_ostream &OS, const ExcessRP &Excess) {
1739- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
1740- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
1749+ OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
1750+ << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
1751+ << " VGPRs in total, next ArchVGPR aligment in "
17411752 << Excess.ArchVGPRsToAlignment << " registers)\n " ;
17421753 return OS;
17431754 }
@@ -1754,12 +1765,18 @@ struct ExcessRP {
17541765} // namespace
17551766
17561767ExcessRP::ExcessRP (const GCNSubtarget &ST, const GCNRegPressure &RP,
1757- unsigned MaxVGPRs)
1758- : UnifiedRF(ST.hasGFX90AInsts()) {
1768+ unsigned MaxSGPRs, unsigned MaxVGPRs,
1769+ bool CombineVGPRSavings)
1770+ : UnifiedRF(ST.hasGFX90AInsts()), CombineVGPRSavings(CombineVGPRSavings) {
1771+ // Compute excess SGPR pressure.
1772+ unsigned NumSGPRs = RP.getSGPRNum ();
1773+ if (NumSGPRs > MaxSGPRs)
1774+ SGPRs = NumSGPRs - MaxSGPRs;
1775+
1776+ // Compute excess ArchVGPR/AGPR pressure.
17591777 unsigned NumArchVGPRs = RP.getArchVGPRNum ();
17601778 unsigned NumAGPRs = RP.getAGPRNum ();
17611779 HasAGPRs = NumAGPRs;
1762-
17631780 if (!UnifiedRF) {
17641781 // Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
17651782 // independently.
@@ -1795,15 +1812,15 @@ ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
17951812 }
17961813}
17971814
1798- bool ExcessRP::saveArchVGPRs (unsigned NumRegs, bool UseArchVGPRForAGPRSpill ) {
1815+ bool ExcessRP::saveArchVGPRs (unsigned NumRegs) {
17991816 bool Progress = saveRegs (ArchVGPRs, NumRegs);
18001817 if (!NumRegs)
18011818 return Progress;
18021819
18031820 if (!UnifiedRF) {
1804- if (UseArchVGPRForAGPRSpill )
1821+ if (CombineVGPRSavings )
18051822 Progress |= saveRegs (AGPRs, NumRegs);
1806- } else if (HasAGPRs && (VGPRs || (UseArchVGPRForAGPRSpill && AGPRs))) {
1823+ } else if (HasAGPRs && (VGPRs || (CombineVGPRSavings && AGPRs))) {
18071824 // There is progress as long as there are VGPRs left to save, even if the
18081825 // save induced by this particular call does not cross an ArchVGPR alignment
18091826 // barrier.
@@ -1827,21 +1844,25 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
18271844 ArchVGPRsToAlignment -= NumRegs;
18281845 }
18291846
1830- // Prioritize saving generic VGPRs, then AGPRs if we allow AGPR-to-ArchVGPR
1831- // spilling and have some free ArchVGPR slots .
1847+ // Prioritize saving generic VGPRs, then AGPRs if we consider that the
1848+ // register allocator will be able to replace an AGPR with an ArchVGPR .
18321849 saveRegs (VGPRs, NumSavedRegs);
1833- if (UseArchVGPRForAGPRSpill )
1850+ if (CombineVGPRSavings )
18341851 saveRegs (AGPRs, NumSavedRegs);
18351852 } else {
18361853 // No AGPR usage in the region i.e., no allocation granule to worry about.
18371854 Progress |= saveRegs (VGPRs, NumRegs);
18381855 }
1839-
18401856 return Progress;
18411857}
18421858
18431859bool ExcessRP::saveAGPRs (unsigned NumRegs) {
1844- return saveRegs (AGPRs, NumRegs) || saveRegs (VGPRs, NumRegs);
1860+ bool Progress = saveRegs (AGPRs, NumRegs);
1861+ if (UnifiedRF)
1862+ Progress |= saveRegs (VGPRs, NumRegs);
1863+ if (CombineVGPRSavings)
1864+ Progress |= saveRegs (ArchVGPRs, NumRegs);
1865+ return Progress;
18451866}
18461867
18471868bool PreRARematStage::canIncreaseOccupancyOrReduceSpill () {
@@ -1869,46 +1890,28 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
18691890 ST.getMaxNumVGPRs (DAG.MinOccupancy + 1 , DynamicVGPRBlockSize);
18701891 IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy ;
18711892
1872- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
1873- if (Cond) {
1874- // We won't try to increase occupancy.
1875- IncreaseOccupancy = false ;
1876- OptRegions.clear ();
1877- }
1878- return Cond;
1879- };
1880-
18811893 // Collect optimizable regions. If there is spilling in any region we will
1882- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
1883- // occupancy by one in the whole function.
1894+ // just try to reduce spilling. Otherwise we will try to increase occupancy by
1895+ // one in the whole function.
18841896 for (unsigned I = 0 , E = DAG.Regions .size (); I != E; ++I) {
18851897 GCNRegPressure &RP = DAG.Pressure [I];
1886-
1887- // Check whether SGPR pressures prevents us from eliminating spilling.
1888- unsigned NumSGPRs = RP.getSGPRNum ();
1889- if (NumSGPRs > MaxSGPRsNoSpill)
1890- ClearOptRegionsIf (IncreaseOccupancy);
1891-
1892- ExcessRP Excess (ST, RP, MaxVGPRsNoSpill);
1893- if (Excess) {
1894- ClearOptRegionsIf (IncreaseOccupancy);
1898+ // We allow ArchVGPR or AGPR savings to count as savings of the other kind
1899+ // of VGPR only when trying to eliminate spilling. We cannot do this when
1900+ // trying to increase occupancy since VGPR class swaps only occur later in
1901+ // the register allocator i.e., the scheduler will not be able to reason
1902+ // about these savings and will not report an increase in the achievable
1903+ // occupancy, triggering rollbacks.
1904+ ExcessRP Excess (ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill,
1905+ /* CombineVGPRSavings=*/ true );
1906+ if (Excess && IncreaseOccupancy) {
1907+ // There is spilling in the region and we were so far trying to increase
1908+ // occupancy. Strop trying that and focus on reducing spilling.
1909+ IncreaseOccupancy = false ;
1910+ OptRegions.clear ();
18951911 } else if (IncreaseOccupancy) {
1896- // Check whether SGPR pressure prevents us from increasing occupancy.
1897- if (ClearOptRegionsIf (NumSGPRs > MaxSGPRsIncOcc)) {
1898- if (DAG.MinOccupancy >= WavesPerEU.first )
1899- return false ;
1900- continue ;
1901- }
1902- if ((Excess = ExcessRP (ST, RP, MaxVGPRsIncOcc))) {
1903- // We can only rematerialize ArchVGPRs at this point.
1904- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs ;
1905- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum ();
1906- if (ClearOptRegionsIf (Excess.AGPRs || NotEnoughArchVGPRs)) {
1907- if (DAG.MinOccupancy >= WavesPerEU.first )
1908- return false ;
1909- continue ;
1910- }
1911- }
1912+ // There is no spilling in the region, try to increase occupancy.
1913+ Excess = ExcessRP (ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc,
1914+ /* CombineVGPRSavings=*/ false );
19121915 }
19131916 if (Excess)
19141917 OptRegions.insert ({I, Excess});
@@ -1928,23 +1931,27 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19281931#endif
19291932
19301933 // When we are reducing spilling, the target is the minimum target number of
1931- // waves/EU determined by the subtarget.
1932- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first ;
1934+ // waves/EU determined by the subtarget. In cases where either one of
1935+ // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
1936+ // minimum region occupancy may be higher than the latter.
1937+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
1938+ : std::max (DAG.MinOccupancy , WavesPerEU.first );
19331939
19341940 // Accounts for a reduction in RP in an optimizable region. Returns whether we
19351941 // estimate that we have identified enough rematerialization opportunities to
19361942 // achieve our goal, and sets Progress to true when this particular reduction
19371943 // in pressure was helpful toward that goal.
19381944 auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
1945+ const TargetRegisterClass *RC,
19391946 bool &Progress) -> bool {
19401947 ExcessRP &Excess = OptIt->getSecond ();
1941- // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
1942- // only when we are just trying to eliminate spilling to memory. At this
1943- // point we err on the conservative side and do not increase
1944- // register-to-register spilling for the sake of increasing occupancy.
1945- Progress |=
1946- Excess. saveArchVGPRs ( SIRegisterInfo::getNumCoveredRegs (Mask),
1947- /* UseArchVGPRForAGPRSpill= */ !IncreaseOccupancy );
1948+ unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs (Mask);
1949+ if (SRI-> isSGPRClass (RC))
1950+ Progress |= Excess. saveSGPRs (NumRegs);
1951+ else if (SRI-> isAGPRClass (RC))
1952+ Progress |= Excess. saveAGPRs (NumRegs);
1953+ else
1954+ Progress |= Excess. saveArchVGPRs (NumRegs );
19481955 if (!Excess)
19491956 OptRegions.erase (OptIt->getFirst ());
19501957 return OptRegions.empty ();
@@ -1966,10 +1973,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
19661973 if (!isTriviallyReMaterializable (DefMI))
19671974 continue ;
19681975
1969- // We only support rematerializing virtual VGPRs with one definition.
1976+ // We only support rematerializing virtual registers with one definition.
19701977 Register Reg = DefMI.getOperand (0 ).getReg ();
1971- if (!Reg.isVirtual () || !SRI->isVGPRClass (DAG.MRI .getRegClass (Reg)) ||
1972- !DAG.MRI .hasOneDef (Reg))
1978+ if (!Reg.isVirtual () || !DAG.MRI .hasOneDef (Reg))
19731979 continue ;
19741980
19751981 // We only care to rematerialize the instruction if it has a single
@@ -2007,14 +2013,15 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
20072013 Rematerializations.try_emplace (&DefMI, UseMI).first ->second ;
20082014
20092015 bool RematUseful = false ;
2016+ const TargetRegisterClass *RC = DAG.MRI .getRegClass (Reg);
20102017 if (auto It = OptRegions.find (I); It != OptRegions.end ()) {
20112018 // Optimistically consider that moving the instruction out of its
20122019 // defining region will reduce RP in the latter; this assumes that
20132020 // maximum RP in the region is reached somewhere between the defining
20142021 // instruction and the end of the region.
20152022 REMAT_DEBUG (dbgs () << " Defining region is optimizable\n " );
20162023 LaneBitmask Mask = DAG.RegionLiveOuts .getLiveRegsForRegionIdx (I)[Reg];
2017- if (ReduceRPInRegion (It, Mask, RematUseful))
2024+ if (ReduceRPInRegion (It, Mask, RC, RematUseful))
20182025 return true ;
20192026 }
20202027
@@ -2034,7 +2041,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
20342041 // instruction's use.
20352042 if (auto It = OptRegions.find (LIRegion); It != OptRegions.end ()) {
20362043 REMAT_DEBUG (dbgs () << " Live-in in region " << LIRegion << ' \n ' );
2037- if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RematUseful))
2044+ if (ReduceRPInRegion (It, DAG.LiveIns [LIRegion][Reg], RC, RematUseful))
20382045 return true ;
20392046 }
20402047 }
0 commit comments