@@ -99,21 +99,21 @@ void GCNRegPressure::inc(unsigned Reg,
9999bool GCNRegPressure::less (const MachineFunction &MF, const GCNRegPressure &O,
100100 unsigned MaxOccupancy) const {
101101 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
102- unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs ( );
102+ unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold (MF );
103103 unsigned DynamicVGPRBlockSize =
104104 MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
105105
106106 const auto SGPROcc = std::min (MaxOccupancy,
107107 ST.getOccupancyWithNumSGPRs (getSGPRNum ()));
108108 const auto VGPROcc = std::min (
109- MaxOccupancy,
110- ST. getOccupancyWithNumVGPRs ( getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ),
111- DynamicVGPRBlockSize));
109+ MaxOccupancy, ST. getOccupancyWithNumVGPRs (
110+ getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ),
111+ DynamicVGPRBlockSize));
112112 const auto OtherSGPROcc = std::min (MaxOccupancy,
113113 ST.getOccupancyWithNumSGPRs (O.getSGPRNum ()));
114114 const auto OtherVGPROcc = std::min (
115115 MaxOccupancy, ST.getOccupancyWithNumVGPRs (
116- O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ),
116+ O.getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ),
117117 DynamicVGPRBlockSize));
118118
119119 const auto Occ = std::min (SGPROcc, VGPROcc);
@@ -139,34 +139,37 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
139139
140140 // Unified excess pressure conditions, accounting for VGPRs used for SGPR
141141 // spills
142- unsigned ExcessVGPR =
143- std::max ( static_cast <int >(getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) +
144- VGPRForSGPRSpills - MaxVGPRs),
145- 0 );
142+ unsigned ExcessVGPR = std::max (
143+ static_cast <int >(getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ) +
144+ VGPRForSGPRSpills - MaxVGPRs),
145+ 0 );
146146 unsigned OtherExcessVGPR = std::max (
147- static_cast <int >(O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) +
147+ static_cast <int >(O.getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ) +
148148 OtherVGPRForSGPRSpills - MaxVGPRs),
149149 0 );
150150 // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
151151 // spills
152+ unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs ();
152153 unsigned ExcessArchVGPR =
153- std::max (static_cast <int >(getVGPRNum (false , MaxArchVGPRs ) +
154- VGPRForSGPRSpills - MaxArchVGPRs ),
154+ std::max (static_cast <int >(getVGPRNum (false , ArchVGPRThreshold ) +
155+ VGPRForSGPRSpills - AddressableArchVGPRs ),
155156 0 );
156157 unsigned OtherExcessArchVGPR =
157- std::max (static_cast <int >(O.getVGPRNum (false , MaxArchVGPRs ) +
158- OtherVGPRForSGPRSpills - MaxArchVGPRs ),
158+ std::max (static_cast <int >(O.getVGPRNum (false , ArchVGPRThreshold ) +
159+ OtherVGPRForSGPRSpills - AddressableArchVGPRs ),
159160 0 );
160161 // AGPR excess pressure conditions
161162 unsigned ExcessAGPR =
162- std::max (static_cast <int >(ST.hasGFX90AInsts ()
163- ? (getAGPRNum (MaxArchVGPRs) - MaxArchVGPRs)
164- : (getAGPRNum (MaxArchVGPRs) - MaxVGPRs)),
163+ std::max (static_cast <int >(
164+ ST.hasGFX90AInsts ()
165+ ? (getAGPRNum (ArchVGPRThreshold) - AddressableArchVGPRs)
166+ : (getAGPRNum (ArchVGPRThreshold) - MaxVGPRs)),
165167 0 );
166168 unsigned OtherExcessAGPR = std::max (
167- static_cast <int >(ST.hasGFX90AInsts ()
168- ? (O.getAGPRNum (MaxArchVGPRs) - MaxArchVGPRs)
169- : (O.getAGPRNum (MaxArchVGPRs) - MaxVGPRs)),
169+ static_cast <int >(
170+ ST.hasGFX90AInsts ()
171+ ? (O.getAGPRNum (ArchVGPRThreshold) - AddressableArchVGPRs)
172+ : (O.getAGPRNum (ArchVGPRThreshold) - MaxVGPRs)),
170173 0 );
171174
172175 bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -187,20 +190,20 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
187190 return VGPRDiff > 0 ;
188191 if (SGPRDiff != 0 ) {
189192 unsigned PureExcessVGPR =
190- std::max (
191- static_cast < int >( getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) -
192- MaxVGPRs),
193- 0 ) +
194- std::max (
195- static_cast < int >( getVGPRNum ( false , MaxArchVGPRs) - MaxArchVGPRs ),
196- 0 );
193+ std::max (static_cast < int >(
194+ getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ) -
195+ MaxVGPRs),
196+ 0 ) +
197+ std::max (static_cast < int >( getVGPRNum ( false , ArchVGPRThreshold) -
198+ AddressableArchVGPRs ),
199+ 0 );
197200 unsigned OtherPureExcessVGPR =
198- std::max (
199- static_cast < int >( O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) -
200- MaxVGPRs),
201- 0 ) +
202- std::max (static_cast <int >(O.getVGPRNum (false , MaxArchVGPRs ) -
203- MaxArchVGPRs ),
201+ std::max (static_cast < int >(
202+ O.getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ) -
203+ MaxVGPRs),
204+ 0 ) +
205+ std::max (static_cast <int >(O.getVGPRNum (false , ArchVGPRThreshold ) -
206+ AddressableArchVGPRs ),
204207 0 );
205208
206209 // If we have a special case where there is a tie in excess VGPR, but one
@@ -231,41 +234,42 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
231234 if (SW != OtherSW)
232235 return SW < OtherSW;
233236 } else {
234- auto VW = getVGPRTuplesWeight (MaxArchVGPRs );
235- auto OtherVW = O.getVGPRTuplesWeight (MaxArchVGPRs );
237+ auto VW = getVGPRTuplesWeight (ArchVGPRThreshold );
238+ auto OtherVW = O.getVGPRTuplesWeight (ArchVGPRThreshold );
236239 if (VW != OtherVW)
237240 return VW < OtherVW;
238241 }
239242 }
240243
241244 // Give final precedence to lower general RP.
242245 return SGPRImportant ? (getSGPRNum () < O.getSGPRNum ())
243- : (getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) <
244- O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ));
246+ : (getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ) <
247+ O.getVGPRNum (ST.hasGFX90AInsts (), ArchVGPRThreshold ));
245248}
246249
247250Printable llvm::print (const GCNRegPressure &RP, const GCNSubtarget *ST,
248- unsigned DynamicVGPRBlockSize) {
249- return Printable ([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
250- OS << " VGPRs: " << RP.getArchVGPRNum (ST->getAddressableNumArchVGPRs ())
251- << ' ' << " AGPRs: " << RP.getAGPRNum (ST->getAddressableNumArchVGPRs ());
252- if (ST)
253- OS << " (O"
254- << ST->getOccupancyWithNumVGPRs (
255- RP.getVGPRNum (ST->hasGFX90AInsts (),
256- ST->getAddressableNumArchVGPRs ()),
257- DynamicVGPRBlockSize)
258- << ' )' ;
259- OS << " , SGPRs: " << RP.getSGPRNum ();
260- if (ST)
261- OS << " (O" << ST->getOccupancyWithNumSGPRs (RP.getSGPRNum ()) << ' )' ;
262- OS << " , LVGPR WT: "
263- << RP.getVGPRTuplesWeight (ST->getAddressableNumArchVGPRs ())
264- << " , LSGPR WT: " << RP.getSGPRTuplesWeight ();
265- if (ST)
266- OS << " -> Occ: " << RP.getOccupancy (*ST, DynamicVGPRBlockSize);
267- OS << ' \n ' ;
268- });
251+ unsigned DynamicVGPRBlockSize,
252+ const MachineFunction *MF) {
253+ unsigned ArchVGPRThreshold = ST->getArchVGPRAllocationThreshold (*MF);
254+ return Printable (
255+ [&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
256+ OS << " VGPRs: " << RP.getArchVGPRNum (ArchVGPRThreshold) << ' '
257+ << " AGPRs: " << RP.getAGPRNum (ArchVGPRThreshold);
258+ if (ST)
259+ OS << " (O"
260+ << ST->getOccupancyWithNumVGPRs (
261+ RP.getVGPRNum (ST->hasGFX90AInsts (), ArchVGPRThreshold),
262+ DynamicVGPRBlockSize)
263+ << ' )' ;
264+ OS << " , SGPRs: " << RP.getSGPRNum ();
265+ if (ST)
266+ OS << " (O" << ST->getOccupancyWithNumSGPRs (RP.getSGPRNum ()) << ' )' ;
267+ OS << " , LVGPR WT: " << RP.getVGPRTuplesWeight (ArchVGPRThreshold)
268+ << " , LSGPR WT: " << RP.getSGPRTuplesWeight ();
269+ if (ST)
270+ OS << " -> Occ: " << RP.getOccupancy (*ST, DynamicVGPRBlockSize, *MF);
271+ OS << ' \n ' ;
272+ });
269273}
270274
271275static LaneBitmask getDefRegMask (const MachineOperand &MO,
@@ -899,9 +903,10 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
899903 auto printRP = [&MF](const GCNRegPressure &RP) {
900904 return Printable ([&RP, &MF](raw_ostream &OS) {
901905 OS << format (PFX " %-5d" , RP.getSGPRNum ())
902- << format (" %-5d" ,
903- RP.getVGPRNum (false , MF.getSubtarget <GCNSubtarget>()
904- .getAddressableNumArchVGPRs ()));
906+ << format (
907+ " %-5d" ,
908+ RP.getVGPRNum (false , MF.getSubtarget <GCNSubtarget>()
909+ .getArchVGPRAllocationThreshold (MF)));
905910 });
906911 };
907912
0 commit comments