Skip to content

Commit a38a924

Browse files
lucas-ramijrbyrnes
authored andcommitted
[AMDGPU] Add GCNRPTarget to track register pressure against a target (llvm#145765)
This adds the `GCNRPTarget` class which models a register pressure target (i.e., maximum number of SGPRs/VGPRS) that one can track register savings against. The only current use of this class is in the scheduler's rematerialization stage. It replaces the more ad-hoc (and now deleted) `ExcessRP` class which used to serve the same purpose. This is only NFC~ish because `GCNRPTarget` tracks VGPR usage more accurately than `ExcessRP` used to. To estimate required combined VGPR savings we now additionally take into account the number of available VGPRs in both banks (ArchVGPR and AGPR) at the time where the RP target is created, whereas we used to only consider explicit savings made from the starting RP. This makes VGPR savings estimations more accurate in cases where we allow for savings in one VGPR bank to help towards reducing pressure in another VGPR bank (see `GCNRPTarget::CombineVGPRSavings`). This is the cause for unit test changes.
1 parent 77f3909 commit a38a924

File tree

4 files changed

+266
-277
lines changed

4 files changed

+266
-277
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,69 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
365365
return LastUseMask;
366366
}
367367

368+
////////////////////////////////////////////////////////////////////////////////
369+
// GCNRPTarget
370+
371+
GCNRPTarget::GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
372+
bool CombineVGPRSavings)
373+
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
374+
const Function &F = MF.getFunction();
375+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
376+
setRegLimits(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F), MF);
377+
}
378+
379+
GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
380+
const MachineFunction &MF, const GCNRegPressure &RP,
381+
bool CombineVGPRSavings)
382+
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
383+
setRegLimits(NumSGPRs, NumVGPRs, MF);
384+
}
385+
386+
GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
387+
const GCNRegPressure &RP, bool CombineVGPRSavings)
388+
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
389+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
390+
unsigned DynamicVGPRBlockSize =
391+
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
392+
setRegLimits(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
393+
ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize), MF);
394+
}
395+
396+
void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
397+
const MachineFunction &MF) {
398+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
399+
unsigned DynamicVGPRBlockSize =
400+
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
401+
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
402+
MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
403+
MaxUnifiedVGPRs =
404+
ST.hasGFX90AInsts()
405+
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
406+
: 0;
407+
}
408+
409+
bool GCNRPTarget::isSaveBeneficial(Register Reg,
410+
const MachineRegisterInfo &MRI) const {
411+
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
412+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
413+
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
414+
415+
if (SRI->isSGPRClass(RC))
416+
return RP.getSGPRNum() > MaxSGPRs;
417+
unsigned NumVGPRs =
418+
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
419+
return isVGPRBankSaveBeneficial(NumVGPRs);
420+
}
421+
422+
bool GCNRPTarget::satisfied() const {
423+
if (RP.getSGPRNum() > MaxSGPRs)
424+
return false;
425+
if (RP.getVGPRNum(false) > MaxVGPRs &&
426+
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
427+
return false;
428+
return satisfiesUnifiedTarget();
429+
}
430+
368431
///////////////////////////////////////////////////////////////////////////////
369432
// GCNRPTracker
370433

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,101 @@ inline GCNRegPressure operator-(const GCNRegPressure &P1,
176176
return Diff;
177177
}
178178

179+
////////////////////////////////////////////////////////////////////////////////
180+
// GCNRPTarget
181+
182+
/// Models a register pressure target, allowing to evaluate and track register
183+
/// savings against that target from a starting \ref GCNRegPressure.
184+
class GCNRPTarget {
185+
public:
186+
/// Sets up the target such that the register pressure starting at \p RP does
187+
/// not show register spilling on function \p MF (w.r.t. the function's
188+
/// mininum target occupancy).
189+
GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
190+
bool CombineVGPRSavings = false);
191+
192+
/// Sets up the target such that the register pressure starting at \p RP does
193+
/// not use more than \p NumSGPRs SGPRs and \p NumVGPRs VGPRs on function \p
194+
/// MF.
195+
GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs, const MachineFunction &MF,
196+
const GCNRegPressure &RP, bool CombineVGPRSavings = false);
197+
198+
/// Sets up the target such that the register pressure starting at \p RP does
199+
/// not prevent achieving an occupancy of at least \p Occupancy on function
200+
/// \p MF.
201+
GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
202+
const GCNRegPressure &RP, bool CombineVGPRSavings = false);
203+
204+
const GCNRegPressure &getCurrentRP() const { return RP; }
205+
206+
void setRP(const GCNRegPressure &NewRP) { RP = NewRP; }
207+
208+
/// Determines whether saving virtual register \p Reg will be beneficial
209+
/// towards achieving the RP target.
210+
bool isSaveBeneficial(Register Reg, const MachineRegisterInfo &MRI) const;
211+
212+
/// Saves virtual register \p Reg with lanemask \p Mask.
213+
void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
214+
RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI);
215+
}
216+
217+
/// Whether the current RP is at or below the defined pressure target.
218+
bool satisfied() const;
219+
220+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
221+
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
222+
OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
223+
<< " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
224+
<< " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
225+
<< " AGPRs";
226+
227+
if (Target.MaxUnifiedVGPRs) {
228+
OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
229+
<< " VGPRs (unified)";
230+
} else if (Target.CombineVGPRSavings) {
231+
OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
232+
<< 2 * Target.MaxVGPRs << " VGPRs (combined target)";
233+
}
234+
return OS;
235+
}
236+
#endif
237+
238+
private:
239+
/// Current register pressure.
240+
GCNRegPressure RP;
241+
242+
/// Target number of SGPRs.
243+
unsigned MaxSGPRs;
244+
/// Target number of ArchVGPRs and AGPRs.
245+
unsigned MaxVGPRs;
246+
/// Target number of overall VGPRs for subtargets with unified RFs. Always 0
247+
/// for subtargets with non-unified RFs.
248+
unsigned MaxUnifiedVGPRs;
249+
/// Whether we consider that the register allocator will be able to swap
250+
/// between ArchVGPRs and AGPRs by copying them to a super register class.
251+
/// Concretely, this allows savings in one of the VGPR banks to help toward
252+
/// savings in the other VGPR bank.
253+
bool CombineVGPRSavings;
254+
255+
inline bool satisifiesVGPRBanksTarget() const {
256+
assert(CombineVGPRSavings && "only makes sense with combined savings");
257+
return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
258+
}
259+
260+
/// Always satisified when the subtarget doesn't have a unified RF.
261+
inline bool satisfiesUnifiedTarget() const {
262+
return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
263+
}
264+
265+
inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {
266+
return NumVGPRs > MaxVGPRs || !satisfiesUnifiedTarget() ||
267+
(CombineVGPRSavings && !satisifiesVGPRBanksTarget());
268+
}
269+
270+
void setRegLimits(unsigned MaxSGPRs, unsigned MaxVGPRs,
271+
const MachineFunction &MF);
272+
};
273+
179274
///////////////////////////////////////////////////////////////////////////////
180275
// GCNRPTracker
181276

@@ -384,7 +479,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
384479
if (!LI.hasSubRanges()) {
385480
for (auto SI : LiveIdxs)
386481
LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
387-
MRI.getMaxLaneMaskForVReg(Reg);
482+
MRI.getMaxLaneMaskForVReg(Reg);
388483
} else
389484
for (const auto &S : LI.subranges()) {
390485
// constrain search for subranges by indexes live at main range

0 commit comments

Comments
 (0)