@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
9999bool GCNRegPressure::less (const MachineFunction &MF, const GCNRegPressure &O,
100100 unsigned MaxOccupancy) const {
101101 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
102+ unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs ();
102103 unsigned DynamicVGPRBlockSize =
103104 MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
104105
105106 const auto SGPROcc = std::min (MaxOccupancy,
106107 ST.getOccupancyWithNumSGPRs (getSGPRNum ()));
107108 const auto VGPROcc = std::min (
108- MaxOccupancy, ST.getOccupancyWithNumVGPRs (getVGPRNum (ST.hasGFX90AInsts ()),
109- DynamicVGPRBlockSize));
109+ MaxOccupancy,
110+ ST.getOccupancyWithNumVGPRs (getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs),
111+ DynamicVGPRBlockSize));
110112 const auto OtherSGPROcc = std::min (MaxOccupancy,
111113 ST.getOccupancyWithNumSGPRs (O.getSGPRNum ()));
112- const auto OtherVGPROcc =
113- std::min ( MaxOccupancy,
114- ST. getOccupancyWithNumVGPRs ( O.getVGPRNum (ST.hasGFX90AInsts ()),
115- DynamicVGPRBlockSize));
114+ const auto OtherVGPROcc = std::min (
115+ MaxOccupancy, ST. getOccupancyWithNumVGPRs (
116+ O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ),
117+ DynamicVGPRBlockSize));
116118
117119 const auto Occ = std::min (SGPROcc, VGPROcc);
118120 const auto OtherOcc = std::min (OtherSGPROcc, OtherVGPROcc);
@@ -135,35 +137,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
135137 unsigned OtherVGPRForSGPRSpills =
136138 (OtherExcessSGPR + (WaveSize - 1 )) / WaveSize;
137139
138- unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs ();
139-
140140 // Unified excess pressure conditions, accounting for VGPRs used for SGPR
141141 // spills
142142 unsigned ExcessVGPR =
143- std::max (static_cast <int >(getVGPRNum (ST.hasGFX90AInsts ()) +
143+ std::max (static_cast <int >(getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) +
144144 VGPRForSGPRSpills - MaxVGPRs),
145145 0 );
146- unsigned OtherExcessVGPR =
147- std::max ( static_cast <int >(O.getVGPRNum (ST.hasGFX90AInsts ()) +
148- OtherVGPRForSGPRSpills - MaxVGPRs),
149- 0 );
146+ unsigned OtherExcessVGPR = std::max (
147+ static_cast <int >(O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) +
148+ OtherVGPRForSGPRSpills - MaxVGPRs),
149+ 0 );
150150 // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
151151 // spills
152- unsigned ExcessArchVGPR = std::max (
153- static_cast <int >(getVGPRNum (false ) + VGPRForSGPRSpills - MaxArchVGPRs),
154- 0 );
152+ unsigned ExcessArchVGPR =
153+ std::max (static_cast <int >(getVGPRNum (false , MaxArchVGPRs) +
154+ VGPRForSGPRSpills - MaxArchVGPRs),
155+ 0 );
155156 unsigned OtherExcessArchVGPR =
156- std::max (static_cast <int >(O.getVGPRNum (false ) + OtherVGPRForSGPRSpills -
157- MaxArchVGPRs),
157+ std::max (static_cast <int >(O.getVGPRNum (false , MaxArchVGPRs ) +
158+ OtherVGPRForSGPRSpills - MaxArchVGPRs),
158159 0 );
159160 // AGPR excess pressure conditions
160- unsigned ExcessAGPR = std::max (
161- static_cast <int >(ST.hasGFX90AInsts () ? (getAGPRNum () - MaxArchVGPRs)
162- : (getAGPRNum () - MaxVGPRs)),
163- 0 );
161+ unsigned ExcessAGPR =
162+ std::max (static_cast <int >(ST.hasGFX90AInsts ()
163+ ? (getAGPRNum (MaxArchVGPRs) - MaxArchVGPRs)
164+ : (getAGPRNum (MaxArchVGPRs) - MaxVGPRs)),
165+ 0 );
164166 unsigned OtherExcessAGPR = std::max (
165- static_cast <int >(ST.hasGFX90AInsts () ? (O.getAGPRNum () - MaxArchVGPRs)
166- : (O.getAGPRNum () - MaxVGPRs)),
167+ static_cast <int >(ST.hasGFX90AInsts ()
168+ ? (O.getAGPRNum (MaxArchVGPRs) - MaxArchVGPRs)
169+ : (O.getAGPRNum (MaxArchVGPRs) - MaxVGPRs)),
167170 0 );
168171
169172 bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -184,14 +187,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
184187 return VGPRDiff > 0 ;
185188 if (SGPRDiff != 0 ) {
186189 unsigned PureExcessVGPR =
187- std::max (static_cast <int >(getVGPRNum (ST.hasGFX90AInsts ()) - MaxVGPRs),
188- 0 ) +
189- std::max (static_cast <int >(getVGPRNum (false ) - MaxArchVGPRs), 0 );
190+ std::max (
191+ static_cast <int >(getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs) -
192+ MaxVGPRs),
193+ 0 ) +
194+ std::max (
195+ static_cast <int >(getVGPRNum (false , MaxArchVGPRs) - MaxArchVGPRs),
196+ 0 );
190197 unsigned OtherPureExcessVGPR =
191198 std::max (
192- static_cast <int >(O.getVGPRNum (ST.hasGFX90AInsts ()) - MaxVGPRs),
199+ static_cast <int >(O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs) -
200+ MaxVGPRs),
193201 0 ) +
194- std::max (static_cast <int >(O.getVGPRNum (false ) - MaxArchVGPRs), 0 );
202+ std::max (static_cast <int >(O.getVGPRNum (false , MaxArchVGPRs) -
203+ MaxArchVGPRs),
204+ 0 );
195205
196206 // If we have a special case where there is a tie in excess VGPR, but one
197207 // of the pressures has VGPR usage from SGPR spills, prefer the pressure
@@ -221,33 +231,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
221231 if (SW != OtherSW)
222232 return SW < OtherSW;
223233 } else {
224- auto VW = getVGPRTuplesWeight ();
225- auto OtherVW = O.getVGPRTuplesWeight ();
234+ auto VW = getVGPRTuplesWeight (MaxArchVGPRs );
235+ auto OtherVW = O.getVGPRTuplesWeight (MaxArchVGPRs );
226236 if (VW != OtherVW)
227237 return VW < OtherVW;
228238 }
229239 }
230240
231241 // Give final precedence to lower general RP.
232- return SGPRImportant ? (getSGPRNum () < O.getSGPRNum ()):
233- (getVGPRNum (ST.hasGFX90AInsts ()) <
234- O.getVGPRNum (ST.hasGFX90AInsts ()));
242+ return SGPRImportant ? (getSGPRNum () < O.getSGPRNum ())
243+ : (getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ) <
244+ O.getVGPRNum (ST.hasGFX90AInsts (), MaxArchVGPRs ));
235245}
236246
237247Printable llvm::print (const GCNRegPressure &RP, const GCNSubtarget *ST,
238248 unsigned DynamicVGPRBlockSize) {
239249 return Printable ([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
240- OS << " VGPRs: " << RP.getArchVGPRNum () << ' '
241- << " AGPRs: " << RP.getAGPRNum ();
250+ OS << " VGPRs: " << RP.getArchVGPRNum (ST-> getAddressableNumArchVGPRs ())
251+ << ' ' << " AGPRs: " << RP.getAGPRNum (ST-> getAddressableNumArchVGPRs () );
242252 if (ST)
243253 OS << " (O"
244- << ST->getOccupancyWithNumVGPRs (RP.getVGPRNum (ST->hasGFX90AInsts ()),
245- DynamicVGPRBlockSize)
254+ << ST->getOccupancyWithNumVGPRs (
255+ RP.getVGPRNum (ST->hasGFX90AInsts (),
256+ ST->getAddressableNumArchVGPRs ()),
257+ DynamicVGPRBlockSize)
246258 << ' )' ;
247259 OS << " , SGPRs: " << RP.getSGPRNum ();
248260 if (ST)
249261 OS << " (O" << ST->getOccupancyWithNumSGPRs (RP.getSGPRNum ()) << ' )' ;
250- OS << " , LVGPR WT: " << RP.getVGPRTuplesWeight ()
262+ OS << " , LVGPR WT: "
263+ << RP.getVGPRTuplesWeight (ST->getAddressableNumArchVGPRs ())
251264 << " , LSGPR WT: " << RP.getSGPRTuplesWeight ();
252265 if (ST)
253266 OS << " -> Occ: " << RP.getOccupancy (*ST, DynamicVGPRBlockSize);
@@ -398,8 +411,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
398411 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
399412 unsigned DynamicVGPRBlockSize =
400413 MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ();
414+ AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs ();
401415 MaxSGPRs = std::min (ST.getAddressableNumSGPRs (), NumSGPRs);
402- MaxVGPRs = std::min (ST. getAddressableNumArchVGPRs () , NumVGPRs);
416+ MaxVGPRs = std::min (AddressableNumArchVGPRs , NumVGPRs);
403417 MaxUnifiedVGPRs =
404418 ST.hasGFX90AInsts ()
405419 ? std::min (ST.getAddressableNumVGPRs (DynamicVGPRBlockSize), NumVGPRs)
@@ -414,15 +428,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,
414428
415429 if (SRI->isSGPRClass (RC))
416430 return RP.getSGPRNum () > MaxSGPRs;
417- unsigned NumVGPRs =
418- SRI->isAGPRClass (RC) ? RP.getAGPRNum () : RP.getArchVGPRNum ();
431+
432+ bool ShouldUseAGPR =
433+ SRI->isAGPRClass (RC) ||
434+ (SRI->isVectorSuperClass (RC) &&
435+ RP.getArchVGPRNum (AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
436+ unsigned NumVGPRs = ShouldUseAGPR
437+ ? RP.getAGPRNum (AddressableNumArchVGPRs)
438+ : RP.getArchVGPRNum (AddressableNumArchVGPRs);
419439 return isVGPRBankSaveBeneficial (NumVGPRs);
420440}
421441
422442bool GCNRPTarget::satisfied () const {
423443 if (RP.getSGPRNum () > MaxSGPRs)
424444 return false ;
425- if (RP.getVGPRNum (false ) > MaxVGPRs &&
445+ if (RP.getVGPRNum (false , AddressableNumArchVGPRs ) > MaxVGPRs &&
426446 (!CombineVGPRSavings || !satisifiesVGPRBanksTarget ()))
427447 return false ;
428448 return satisfiesUnifiedTarget ();
@@ -876,10 +896,12 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
876896
877897 OS << " ---\n name: " << MF.getName () << " \n body: |\n " ;
878898
879- auto printRP = [](const GCNRegPressure &RP) {
880- return Printable ([&RP](raw_ostream &OS) {
899+ auto printRP = [&MF ](const GCNRegPressure &RP) {
900+ return Printable ([&RP, &MF ](raw_ostream &OS) {
881901 OS << format (PFX " %-5d" , RP.getSGPRNum ())
882- << format (" %-5d" , RP.getVGPRNum (false ));
902+ << format (" %-5d" ,
903+ RP.getVGPRNum (false , MF.getSubtarget <GCNSubtarget>()
904+ .getAddressableNumArchVGPRs ()));
883905 });
884906 };
885907
0 commit comments