Skip to content

Commit 640025a

Browse files
committed
Use occupancy calculation taking into account VGPR combined savings
1 parent fde9ee1 commit 640025a

11 files changed

+682
-179
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNRegPressure.h"
1515
#include "AMDGPU.h"
1616
#include "SIMachineFunctionInfo.h"
17+
#include "Utils/AMDGPUBaseInfo.h"
1718
#include "llvm/CodeGen/RegisterPressure.h"
1819

1920
using namespace llvm;
@@ -45,6 +46,18 @@ unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
4546
: (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
4647
}
4748

49+
unsigned GCNRegPressure::getOccupancy(const GCNSubtarget &ST,
50+
unsigned DynamicVGPRBlockSize,
51+
bool BalanceVGPRUsage) const {
52+
const bool UnifiedRF = ST.hasGFX90AInsts();
53+
unsigned NumVGPRs = (!UnifiedRF && BalanceVGPRUsage)
54+
? divideCeil(getArchVGPRNum() + getAGPRNum(), 2)
55+
: getVGPRNum(UnifiedRF);
56+
57+
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
58+
ST.getOccupancyWithNumVGPRs(NumVGPRs, DynamicVGPRBlockSize));
59+
}
60+
4861
void GCNRegPressure::inc(unsigned Reg,
4962
LaneBitmask PrevMask,
5063
LaneBitmask NewMask,
@@ -370,31 +383,33 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
370383

371384
GCNRPTarget::GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP,
372385
bool CombineVGPRSavings)
373-
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
386+
: MF(MF), RP(RP) {
374387
const Function &F = MF.getFunction();
375388
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
376-
setRegLimits(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F), MF);
389+
setTarget(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F), CombineVGPRSavings);
377390
}
378391

379392
GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
380393
const MachineFunction &MF, const GCNRegPressure &RP,
381394
bool CombineVGPRSavings)
382-
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
383-
setRegLimits(NumSGPRs, NumVGPRs, MF);
395+
: MF(MF), RP(RP) {
396+
setTarget(NumSGPRs, NumVGPRs, CombineVGPRSavings);
384397
}
385398

386399
GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
387400
const GCNRegPressure &RP, bool CombineVGPRSavings)
388-
: RP(RP), CombineVGPRSavings(CombineVGPRSavings) {
401+
: MF(MF), RP(RP) {
389402
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
390403
unsigned DynamicVGPRBlockSize =
391404
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
392-
setRegLimits(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
393-
ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize), MF);
405+
setTarget(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
406+
ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize),
407+
CombineVGPRSavings);
394408
}
395409

396-
void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
397-
const MachineFunction &MF) {
410+
void GCNRPTarget::setTarget(unsigned NumSGPRs, unsigned NumVGPRs,
411+
bool CombineVGPRSavings) {
412+
this->CombineVGPRSavings = CombineVGPRSavings;
398413
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
399414
unsigned DynamicVGPRBlockSize =
400415
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
@@ -406,8 +421,8 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
406421
: 0;
407422
}
408423

409-
bool GCNRPTarget::isSaveBeneficial(Register Reg,
410-
const MachineRegisterInfo &MRI) const {
424+
bool GCNRPTarget::isSaveBeneficial(Register Reg) const {
425+
const MachineRegisterInfo &MRI = MF.getRegInfo();
411426
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
412427
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
413428
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
1919

2020
#include "GCNSubtarget.h"
21+
#include "SIMachineFunctionInfo.h"
22+
#include "Utils/AMDGPUBaseInfo.h"
2123
#include "llvm/CodeGen/LiveIntervals.h"
2224
#include "llvm/CodeGen/RegisterPressure.h"
2325
#include <algorithm>
@@ -83,12 +85,12 @@ struct GCNRegPressure {
8385
}
8486
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
8587

86-
unsigned getOccupancy(const GCNSubtarget &ST,
87-
unsigned DynamicVGPRBlockSize) const {
88-
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
89-
ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
90-
DynamicVGPRBlockSize));
91-
}
88+
/// Determines the occupancy achievable with the current RP, when \p
89+
/// BalanceVGPRUsage is true on subtargets with non-unified RFs, the
90+
/// occupancy w.r.t. the number of VGPRs is computed as if we will later be
91+
/// able to evenly balance out VGPR usage among ArchVGPR and AGPR banks.
92+
unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize,
93+
bool BalanceVGPRUsage = false) const;
9294

9395
void inc(unsigned Reg,
9496
LaneBitmask PrevMask,
@@ -201,22 +203,34 @@ class GCNRPTarget {
201203
GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
202204
const GCNRegPressure &RP, bool CombineVGPRSavings = false);
203205

206+
/// Changes the target (same semantics as constructor).
207+
void setTarget(unsigned NumSGPRs, unsigned NumVGPRs,
208+
bool CombineVGPRSavings = false);
209+
204210
const GCNRegPressure &getCurrentRP() const { return RP; }
205211

206212
void setRP(const GCNRegPressure &NewRP) { RP = NewRP; }
207213

208214
/// Determines whether saving virtual register \p Reg will be beneficial
209215
/// towards achieving the RP target.
210-
bool isSaveBeneficial(Register Reg, const MachineRegisterInfo &MRI) const;
216+
bool isSaveBeneficial(Register Reg) const;
211217

212218
/// Saves virtual register \p Reg with lanemask \p Mask.
213-
void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
214-
RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI);
219+
void saveReg(Register Reg, LaneBitmask Mask) {
220+
RP.inc(Reg, Mask, LaneBitmask::getNone(), MF.getRegInfo());
215221
}
216222

217223
/// Whether the current RP is at or below the defined pressure target.
218224
bool satisfied() const;
219225

226+
/// Computes achievable occupancy with the currently tracked register pressure.
227+
unsigned getOccupancy() const {
228+
return RP.getOccupancy(
229+
MF.getSubtarget<GCNSubtarget>(),
230+
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(),
231+
/*BalanceVGPRUsage=*/CombineVGPRSavings);
232+
}
233+
220234
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
221235
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
222236
OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
@@ -236,6 +250,8 @@ class GCNRPTarget {
236250
#endif
237251

238252
private:
253+
const MachineFunction &MF;
254+
239255
/// Current register pressure.
240256
GCNRegPressure RP;
241257

@@ -248,7 +264,7 @@ class GCNRPTarget {
248264
unsigned MaxUnifiedVGPRs;
249265
/// Whether we consider that the register allocator will be able to swap
250266
/// between ArchVGPRs and AGPRs by copying them to a super register class.
251-
/// Concretely, this allows savings in one of the VGPR banks to help toward
267+
/// Concretely, this allows free registers in one VGPR bank to help toward
252268
/// savings in the other VGPR bank.
253269
bool CombineVGPRSavings;
254270

@@ -266,9 +282,6 @@ class GCNRPTarget {
266282
return NumVGPRs > MaxVGPRs || !satisfiesUnifiedTarget() ||
267283
(CombineVGPRSavings && !satisifiesVGPRBanksTarget());
268284
}
269-
270-
void setRegLimits(unsigned MaxSGPRs, unsigned MaxVGPRs,
271-
const MachineFunction &MF);
272285
};
273286

274287
///////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)