Skip to content

Commit 52cbccd

Browse files
committed
[AMDGPU] More accurately account for AVGPR pressure
Change-Id: I6f129c2723b52a391a96178e390f60535164ac9b
1 parent 83dfdd8 commit 52cbccd

File tree

4 files changed

+152
-87
lines changed

4 files changed

+152
-87
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 68 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
9999
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
100100
unsigned MaxOccupancy) const {
101101
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
102+
unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
102103
unsigned DynamicVGPRBlockSize =
103104
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
104105

105106
const auto SGPROcc = std::min(MaxOccupancy,
106107
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
107108
const auto VGPROcc = std::min(
108-
MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
109-
DynamicVGPRBlockSize));
109+
MaxOccupancy,
110+
ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
111+
DynamicVGPRBlockSize));
110112
const auto OtherSGPROcc = std::min(MaxOccupancy,
111113
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
112-
const auto OtherVGPROcc =
113-
std::min(MaxOccupancy,
114-
ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
115-
DynamicVGPRBlockSize));
114+
const auto OtherVGPROcc = std::min(
115+
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
116+
O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
117+
DynamicVGPRBlockSize));
116118

117119
const auto Occ = std::min(SGPROcc, VGPROcc);
118120
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -135,35 +137,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
135137
unsigned OtherVGPRForSGPRSpills =
136138
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
137139

138-
unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
139-
140140
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
141141
// spills
142142
unsigned ExcessVGPR =
143-
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
143+
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
144144
VGPRForSGPRSpills - MaxVGPRs),
145145
0);
146-
unsigned OtherExcessVGPR =
147-
std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
148-
OtherVGPRForSGPRSpills - MaxVGPRs),
149-
0);
146+
unsigned OtherExcessVGPR = std::max(
147+
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
148+
OtherVGPRForSGPRSpills - MaxVGPRs),
149+
0);
150150
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
151151
// spills
152-
unsigned ExcessArchVGPR = std::max(
153-
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
154-
0);
152+
unsigned ExcessArchVGPR =
153+
std::max(static_cast<int>(getVGPRNum(false, MaxArchVGPRs) +
154+
VGPRForSGPRSpills - MaxArchVGPRs),
155+
0);
155156
unsigned OtherExcessArchVGPR =
156-
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
157-
MaxArchVGPRs),
157+
std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) +
158+
OtherVGPRForSGPRSpills - MaxArchVGPRs),
158159
0);
159160
// AGPR excess pressure conditions
160-
unsigned ExcessAGPR = std::max(
161-
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
162-
: (getAGPRNum() - MaxVGPRs)),
163-
0);
161+
unsigned ExcessAGPR =
162+
std::max(static_cast<int>(ST.hasGFX90AInsts()
163+
? (getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
164+
: (getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
165+
0);
164166
unsigned OtherExcessAGPR = std::max(
165-
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
166-
: (O.getAGPRNum() - MaxVGPRs)),
167+
static_cast<int>(ST.hasGFX90AInsts()
168+
? (O.getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
169+
: (O.getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
167170
0);
168171

169172
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -184,14 +187,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
184187
return VGPRDiff > 0;
185188
if (SGPRDiff != 0) {
186189
unsigned PureExcessVGPR =
187-
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
188-
0) +
189-
std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
190+
std::max(
191+
static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
192+
MaxVGPRs),
193+
0) +
194+
std::max(
195+
static_cast<int>(getVGPRNum(false, MaxArchVGPRs) - MaxArchVGPRs),
196+
0);
190197
unsigned OtherPureExcessVGPR =
191198
std::max(
192-
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
199+
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
200+
MaxVGPRs),
193201
0) +
194-
std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
202+
std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) -
203+
MaxArchVGPRs),
204+
0);
195205

196206
// If we have a special case where there is a tie in excess VGPR, but one
197207
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
@@ -221,33 +231,36 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
221231
if (SW != OtherSW)
222232
return SW < OtherSW;
223233
} else {
224-
auto VW = getVGPRTuplesWeight();
225-
auto OtherVW = O.getVGPRTuplesWeight();
234+
auto VW = getVGPRTuplesWeight(MaxArchVGPRs);
235+
auto OtherVW = O.getVGPRTuplesWeight(MaxArchVGPRs);
226236
if (VW != OtherVW)
227237
return VW < OtherVW;
228238
}
229239
}
230240

231241
// Give final precedence to lower general RP.
232-
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
233-
(getVGPRNum(ST.hasGFX90AInsts()) <
234-
O.getVGPRNum(ST.hasGFX90AInsts()));
242+
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
243+
: (getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) <
244+
O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs));
235245
}
236246

237247
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
238248
unsigned DynamicVGPRBlockSize) {
239249
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
240-
OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
241-
<< "AGPRs: " << RP.getAGPRNum();
250+
OS << "VGPRs: " << RP.getArchVGPRNum(ST->getAddressableNumArchVGPRs())
251+
<< ' ' << "AGPRs: " << RP.getAGPRNum(ST->getAddressableNumArchVGPRs());
242252
if (ST)
243253
OS << "(O"
244-
<< ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
245-
DynamicVGPRBlockSize)
254+
<< ST->getOccupancyWithNumVGPRs(
255+
RP.getVGPRNum(ST->hasGFX90AInsts(),
256+
ST->getAddressableNumArchVGPRs()),
257+
DynamicVGPRBlockSize)
246258
<< ')';
247259
OS << ", SGPRs: " << RP.getSGPRNum();
248260
if (ST)
249261
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
250-
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
262+
OS << ", LVGPR WT: "
263+
<< RP.getVGPRTuplesWeight(ST->getAddressableNumArchVGPRs())
251264
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
252265
if (ST)
253266
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
@@ -398,8 +411,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
398411
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
399412
unsigned DynamicVGPRBlockSize =
400413
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
414+
AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs();
401415
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
402-
MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
416+
MaxVGPRs = std::min(AddressableNumArchVGPRs, NumVGPRs);
403417
MaxUnifiedVGPRs =
404418
ST.hasGFX90AInsts()
405419
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
@@ -414,15 +428,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,
414428

415429
if (SRI->isSGPRClass(RC))
416430
return RP.getSGPRNum() > MaxSGPRs;
417-
unsigned NumVGPRs =
418-
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
431+
432+
bool ShouldUseAGPR =
433+
SRI->isAGPRClass(RC) ||
434+
(SRI->isVectorSuperClass(RC) &&
435+
RP.getArchVGPRNum(AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
436+
unsigned NumVGPRs = ShouldUseAGPR
437+
? RP.getAGPRNum(AddressableNumArchVGPRs)
438+
: RP.getArchVGPRNum(AddressableNumArchVGPRs);
419439
return isVGPRBankSaveBeneficial(NumVGPRs);
420440
}
421441

422442
bool GCNRPTarget::satisfied() const {
423443
if (RP.getSGPRNum() > MaxSGPRs)
424444
return false;
425-
if (RP.getVGPRNum(false) > MaxVGPRs &&
445+
if (RP.getVGPRNum(false, AddressableNumArchVGPRs) > MaxVGPRs &&
426446
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
427447
return false;
428448
return satisfiesUnifiedTarget();
@@ -876,10 +896,12 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
876896

877897
OS << "---\nname: " << MF.getName() << "\nbody: |\n";
878898

879-
auto printRP = [](const GCNRegPressure &RP) {
880-
return Printable([&RP](raw_ostream &OS) {
899+
auto printRP = [&MF](const GCNRegPressure &RP) {
900+
return Printable([&RP, &MF](raw_ostream &OS) {
881901
OS << format(PFX " %-5d", RP.getSGPRNum())
882-
<< format(" %-5d", RP.getVGPRNum(false));
902+
<< format(" %-5d",
903+
RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
904+
.getAddressableNumArchVGPRs()));
883905
});
884906
};
885907

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ struct GCNRegPressure {
4343

4444
/// \returns the SGPR32 pressure
4545
unsigned getSGPRNum() const { return Value[SGPR]; }
46-
/// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
47-
/// dependent upon \p UnifiedVGPRFile
48-
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
46+
unsigned getVGPRNum(bool UnifiedVGPRFile,
47+
unsigned AddressableArchVGPR) const {
4948
if (UnifiedVGPRFile) {
50-
return Value[AGPR]
51-
? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
52-
: Value[VGPR] + Value[AVGPR];
49+
return Value[AGPR] || Value[AVGPR]
50+
? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR],
51+
AddressableArchVGPR)
52+
: Value[VGPR];
5353
}
5454
// AVGPR assignment priority is based on the width of the register. Account
5555
// AVGPR pressure as VGPR.
@@ -61,33 +61,60 @@ struct GCNRegPressure {
6161
/// VGPR file.
6262
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
6363
unsigned NumAGPRs,
64-
unsigned NumAVGPRs) {
65-
66-
// Assume AVGPRs will be assigned as VGPRs.
67-
return alignTo(NumArchVGPRs + NumAVGPRs,
64+
unsigned NumAVGPRs,
65+
unsigned AddressableArchVGPR) {
66+
67+
// Until we hit the VGPRThreshold, we will assign AV as VGPR. After that
68+
// point, we will assign as AGPR.
69+
unsigned AVGPRsAsVGPRs =
70+
NumArchVGPRs < AddressableArchVGPR
71+
? std::min((AddressableArchVGPR - NumArchVGPRs), NumAVGPRs)
72+
: 0;
73+
unsigned AVGPRsAsAGPRs =
74+
NumAVGPRs > AVGPRsAsVGPRs ? NumAVGPRs - AVGPRsAsVGPRs : 0;
75+
return alignTo(NumArchVGPRs + AVGPRsAsVGPRs,
6876
AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
69-
NumAGPRs;
77+
NumAGPRs + AVGPRsAsAGPRs;
7078
}
7179

7280
/// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
7381
/// allocated as VGPR
74-
unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
82+
unsigned getArchVGPRNum(unsigned AddressableArchVGPR) const {
83+
return std::min(Value[VGPR] + Value[AVGPR], AddressableArchVGPR);
84+
}
7585
/// \returns the AccVGPR32 pressure
76-
unsigned getAGPRNum() const { return Value[AGPR]; }
86+
unsigned getAGPRNum(unsigned AddressableArchVGPR) const {
87+
unsigned VGPRsForAGPRs =
88+
Value[VGPR] + Value[AVGPR] > AddressableArchVGPR
89+
? (Value[VGPR] + Value[AVGPR] - AddressableArchVGPR)
90+
: 0;
91+
return Value[AGPR] + VGPRsForAGPRs;
92+
}
7793
/// \returns the AVGPR32 pressure
7894
unsigned getAVGPRNum() const { return Value[AVGPR]; }
7995

80-
unsigned getVGPRTuplesWeight() const {
81-
return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
82-
Value[TOTAL_KINDS + AGPR]);
96+
unsigned getVGPRTuplesWeight(unsigned AddressableArchVGPR) const {
97+
unsigned AVGPRsAsVGPRs =
98+
Value[TOTAL_KINDS + VGPR] < AddressableArchVGPR
99+
? std::min(AddressableArchVGPR - Value[TOTAL_KINDS + VGPR],
100+
Value[TOTAL_KINDS + AVGPR])
101+
: 0;
102+
unsigned AVGPRsAsAGPRs = Value[TOTAL_KINDS + AVGPR] > AVGPRsAsVGPRs
103+
? Value[TOTAL_KINDS + AVGPR] - AVGPRsAsVGPRs
104+
: 0;
105+
106+
return std::max(Value[TOTAL_KINDS + VGPR] + AVGPRsAsVGPRs,
107+
Value[TOTAL_KINDS + AGPR] + AVGPRsAsAGPRs);
83108
}
84109
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
85110

86111
unsigned getOccupancy(const GCNSubtarget &ST,
87112
unsigned DynamicVGPRBlockSize) const {
88-
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
89-
ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
90-
DynamicVGPRBlockSize));
113+
return std::min(
114+
ST.getOccupancyWithNumSGPRs(getSGPRNum()),
115+
ST.getOccupancyWithNumVGPRs(
116+
getVGPRNum(ST.hasGFX90AInsts(), ST.getAddressableNumArchVGPRs()),
117+
DynamicVGPRBlockSize));
91118
}
92119

93120
void inc(unsigned Reg,
@@ -151,7 +178,7 @@ struct GCNRegPressure {
151178
friend GCNRegPressure max(const GCNRegPressure &P1,
152179
const GCNRegPressure &P2);
153180

154-
friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST,
181+
friend Printable print(const GCNRegPressure &RP,
155182
unsigned DynamicVGPRBlockSize);
156183
};
157184

@@ -220,16 +247,19 @@ class GCNRPTarget {
220247
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
221248
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
222249
OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs
223-
<< " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs
224-
<< " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs
225-
<< " AGPRs";
250+
<< " SGPRs, " << Target.RP.getArchVGPRNum(Target.AddressableNumArchVGPRs)
251+
<< '/' << Target.MaxVGPRs << " ArchVGPRs, "
252+
<< Target.RP.getAGPRNum(Target.AddressableNumArchVGPRs) << '/'
253+
<< Target.MaxVGPRs << " AGPRs";
226254

227255
if (Target.MaxUnifiedVGPRs) {
228-
OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs
229-
<< " VGPRs (unified)";
256+
OS << ", " << Target.RP.getVGPRNum(true, Target.AddressableNumArchVGPRs)
257+
<< '/' << Target.MaxUnifiedVGPRs << " VGPRs (unified)";
230258
} else if (Target.CombineVGPRSavings) {
231-
OS << ", " << Target.RP.getArchVGPRNum() + Target.RP.getAGPRNum() << '/'
232-
<< 2 * Target.MaxVGPRs << " VGPRs (combined target)";
259+
OS << ", "
260+
<< Target.RP.getArchVGPRNum(Target.AddressableNumArchVGPRs) +
261+
Target.RP.getAGPRNum(Target.AddressableNumArchVGPRs)
262+
<< '/' << 2 * Target.MaxVGPRs << " VGPRs (combined target)";
233263
}
234264
return OS;
235265
}
@@ -238,14 +268,15 @@ class GCNRPTarget {
238268
private:
239269
/// Current register pressure.
240270
GCNRegPressure RP;
241-
242271
/// Target number of SGPRs.
243272
unsigned MaxSGPRs;
244273
/// Target number of ArchVGPRs and AGPRs.
245274
unsigned MaxVGPRs;
246275
/// Target number of overall VGPRs for subtargets with unified RFs. Always 0
247276
/// for subtargets with non-unified RFs.
248277
unsigned MaxUnifiedVGPRs;
278+
/// The maximum number of arch vgprs allowed by the subtarget.
279+
unsigned AddressableNumArchVGPRs;
249280
/// Whether we consider that the register allocator will be able to swap
250281
/// between ArchVGPRs and AGPRs by copying them to a super register class.
251282
/// Concretely, this allows savings in one of the VGPR banks to help toward
@@ -254,12 +285,15 @@ class GCNRPTarget {
254285

255286
inline bool satisifiesVGPRBanksTarget() const {
256287
assert(CombineVGPRSavings && "only makes sense with combined savings");
257-
return RP.getArchVGPRNum() + RP.getAGPRNum() <= 2 * MaxVGPRs;
288+
return RP.getArchVGPRNum(AddressableNumArchVGPRs) +
289+
RP.getAGPRNum(AddressableNumArchVGPRs) <=
290+
2 * MaxVGPRs;
258291
}
259292

260293
/// Always satisified when the subtarget doesn't have a unified RF.
261294
inline bool satisfiesUnifiedTarget() const {
262-
return !MaxUnifiedVGPRs || RP.getVGPRNum(true) <= MaxUnifiedVGPRs;
295+
return !MaxUnifiedVGPRs ||
296+
RP.getVGPRNum(true, AddressableNumArchVGPRs) <= MaxUnifiedVGPRs;
263297
}
264298

265299
inline bool isVGPRBankSaveBeneficial(unsigned NumVGPRs) const {

0 commit comments

Comments
 (0)