Skip to content

Commit 99a5402

Browse files
committed
Handle gfx908 case
Change-Id: Ic16c8a4ffdf58027de164c598cfac70fc453bb00
1 parent 52cbccd commit 99a5402

File tree

7 files changed

+154
-124
lines changed

7 files changed

+154
-124
lines changed

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
451451
const unsigned DynamicVGPRBlockSize =
452452
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
453453
const auto Occ =
454-
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
454+
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
455455
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
456456
<< ", current = " << Occ << '\n');
457457

@@ -460,7 +460,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
460460
// Always build the DAG to add mutations
461461
BuildDAG DAG(*R, *this);
462462

463-
if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
463+
if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF) >= NewOcc)
464464
continue;
465465

466466
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -471,7 +471,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
471471
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
472472
printSchedRP(dbgs(), R->MaxPressure, MaxRP));
473473

474-
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
474+
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
475475
if (NewOcc <= Occ)
476476
break;
477477

@@ -496,7 +496,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
496496

497497
sortRegionsByPressure(TgtOcc);
498498
auto Occ =
499-
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
499+
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
500500

501501
bool IsReentry = false;
502502
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -527,21 +527,22 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
527527
const auto RP = getRegionPressure(*R);
528528
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
529529

530-
if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
530+
if (RP.getOccupancy(ST, DynamicVGPRBlockSize, MF) < TgtOcc) {
531531
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
532-
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
533-
ST, DynamicVGPRBlockSize) >= TgtOcc) {
532+
if (R->BestSchedule.get() &&
533+
R->BestSchedule->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize,
534+
MF) >= TgtOcc) {
534535
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
535536
scheduleBest(*R);
536537
} else {
537538
LLVM_DEBUG(dbgs() << ", restoring\n");
538539
Ovr.restoreOrder();
539-
assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
540+
assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF) >=
540541
TgtOcc);
541542
}
542543
}
543-
FinalOccupancy =
544-
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
544+
FinalOccupancy = std::min(FinalOccupancy,
545+
RP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
545546
}
546547
}
547548
MFI->limitOccupancy(FinalOccupancy);
@@ -591,7 +592,7 @@ void GCNIterativeScheduler::scheduleILP(
591592

592593
sortRegionsByPressure(TgtOcc);
593594
auto Occ =
594-
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
595+
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize, MF);
595596

596597
bool IsReentry = false;
597598
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -612,18 +613,19 @@ void GCNIterativeScheduler::scheduleILP(
612613
const auto RP = getSchedulePressure(*R, ILPSchedule);
613614
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
614615

615-
if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
616+
if (RP.getOccupancy(ST, DynamicVGPRBlockSize, MF) < TgtOcc) {
616617
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
617-
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
618-
ST, DynamicVGPRBlockSize) >= TgtOcc) {
618+
if (R->BestSchedule.get() &&
619+
R->BestSchedule->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize,
620+
MF) >= TgtOcc) {
619621
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
620622
scheduleBest(*R);
621623
}
622624
} else {
623625
scheduleRegion(*R, ILPSchedule, RP);
624626
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
625-
FinalOccupancy =
626-
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
627+
FinalOccupancy = std::min(FinalOccupancy,
628+
RP.getOccupancy(ST, DynamicVGPRBlockSize, MF));
627629
}
628630
}
629631
MFI->limitOccupancy(FinalOccupancy);

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 66 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -99,21 +99,21 @@ void GCNRegPressure::inc(unsigned Reg,
9999
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
100100
unsigned MaxOccupancy) const {
101101
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
102-
unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
102+
unsigned ArchVGPRThreshold = ST.getArchVGPRAllocationThreshold(MF);
103103
unsigned DynamicVGPRBlockSize =
104104
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
105105

106106
const auto SGPROcc = std::min(MaxOccupancy,
107107
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
108108
const auto VGPROcc = std::min(
109-
MaxOccupancy,
110-
ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
111-
DynamicVGPRBlockSize));
109+
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
110+
getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
111+
DynamicVGPRBlockSize));
112112
const auto OtherSGPROcc = std::min(MaxOccupancy,
113113
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
114114
const auto OtherVGPROcc = std::min(
115115
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
116-
O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs),
116+
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
117117
DynamicVGPRBlockSize));
118118

119119
const auto Occ = std::min(SGPROcc, VGPROcc);
@@ -139,34 +139,37 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
139139

140140
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
141141
// spills
142-
unsigned ExcessVGPR =
143-
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
144-
VGPRForSGPRSpills - MaxVGPRs),
145-
0);
142+
unsigned ExcessVGPR = std::max(
143+
static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
144+
VGPRForSGPRSpills - MaxVGPRs),
145+
0);
146146
unsigned OtherExcessVGPR = std::max(
147-
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) +
147+
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
148148
OtherVGPRForSGPRSpills - MaxVGPRs),
149149
0);
150150
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
151151
// spills
152+
unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs();
152153
unsigned ExcessArchVGPR =
153-
std::max(static_cast<int>(getVGPRNum(false, MaxArchVGPRs) +
154-
VGPRForSGPRSpills - MaxArchVGPRs),
154+
std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) +
155+
VGPRForSGPRSpills - AddressableArchVGPRs),
155156
0);
156157
unsigned OtherExcessArchVGPR =
157-
std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) +
158-
OtherVGPRForSGPRSpills - MaxArchVGPRs),
158+
std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) +
159+
OtherVGPRForSGPRSpills - AddressableArchVGPRs),
159160
0);
160161
// AGPR excess pressure conditions
161162
unsigned ExcessAGPR =
162-
std::max(static_cast<int>(ST.hasGFX90AInsts()
163-
? (getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
164-
: (getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
163+
std::max(static_cast<int>(
164+
ST.hasGFX90AInsts()
165+
? (getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
166+
: (getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
165167
0);
166168
unsigned OtherExcessAGPR = std::max(
167-
static_cast<int>(ST.hasGFX90AInsts()
168-
? (O.getAGPRNum(MaxArchVGPRs) - MaxArchVGPRs)
169-
: (O.getAGPRNum(MaxArchVGPRs) - MaxVGPRs)),
169+
static_cast<int>(
170+
ST.hasGFX90AInsts()
171+
? (O.getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
172+
: (O.getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
170173
0);
171174

172175
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -187,20 +190,20 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
187190
return VGPRDiff > 0;
188191
if (SGPRDiff != 0) {
189192
unsigned PureExcessVGPR =
190-
std::max(
191-
static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
192-
MaxVGPRs),
193-
0) +
194-
std::max(
195-
static_cast<int>(getVGPRNum(false, MaxArchVGPRs) - MaxArchVGPRs),
196-
0);
193+
std::max(static_cast<int>(
194+
getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
195+
MaxVGPRs),
196+
0) +
197+
std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) -
198+
AddressableArchVGPRs),
199+
0);
197200
unsigned OtherPureExcessVGPR =
198-
std::max(
199-
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) -
200-
MaxVGPRs),
201-
0) +
202-
std::max(static_cast<int>(O.getVGPRNum(false, MaxArchVGPRs) -
203-
MaxArchVGPRs),
201+
std::max(static_cast<int>(
202+
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
203+
MaxVGPRs),
204+
0) +
205+
std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) -
206+
AddressableArchVGPRs),
204207
0);
205208

206209
// If we have a special case where there is a tie in excess VGPR, but one
@@ -231,41 +234,42 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
231234
if (SW != OtherSW)
232235
return SW < OtherSW;
233236
} else {
234-
auto VW = getVGPRTuplesWeight(MaxArchVGPRs);
235-
auto OtherVW = O.getVGPRTuplesWeight(MaxArchVGPRs);
237+
auto VW = getVGPRTuplesWeight(ArchVGPRThreshold);
238+
auto OtherVW = O.getVGPRTuplesWeight(ArchVGPRThreshold);
236239
if (VW != OtherVW)
237240
return VW < OtherVW;
238241
}
239242
}
240243

241244
// Give final precedence to lower general RP.
242245
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
243-
: (getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs) <
244-
O.getVGPRNum(ST.hasGFX90AInsts(), MaxArchVGPRs));
246+
: (getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <
247+
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold));
245248
}
246249

247250
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
248-
unsigned DynamicVGPRBlockSize) {
249-
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
250-
OS << "VGPRs: " << RP.getArchVGPRNum(ST->getAddressableNumArchVGPRs())
251-
<< ' ' << "AGPRs: " << RP.getAGPRNum(ST->getAddressableNumArchVGPRs());
252-
if (ST)
253-
OS << "(O"
254-
<< ST->getOccupancyWithNumVGPRs(
255-
RP.getVGPRNum(ST->hasGFX90AInsts(),
256-
ST->getAddressableNumArchVGPRs()),
257-
DynamicVGPRBlockSize)
258-
<< ')';
259-
OS << ", SGPRs: " << RP.getSGPRNum();
260-
if (ST)
261-
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
262-
OS << ", LVGPR WT: "
263-
<< RP.getVGPRTuplesWeight(ST->getAddressableNumArchVGPRs())
264-
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
265-
if (ST)
266-
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
267-
OS << '\n';
268-
});
251+
unsigned DynamicVGPRBlockSize,
252+
const MachineFunction *MF) {
253+
unsigned ArchVGPRThreshold = ST->getArchVGPRAllocationThreshold(*MF);
254+
return Printable(
255+
[&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
256+
OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
257+
<< "AGPRs: " << RP.getAGPRNum(ArchVGPRThreshold);
258+
if (ST)
259+
OS << "(O"
260+
<< ST->getOccupancyWithNumVGPRs(
261+
RP.getVGPRNum(ST->hasGFX90AInsts(), ArchVGPRThreshold),
262+
DynamicVGPRBlockSize)
263+
<< ')';
264+
OS << ", SGPRs: " << RP.getSGPRNum();
265+
if (ST)
266+
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
267+
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight(ArchVGPRThreshold)
268+
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
269+
if (ST)
270+
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize, *MF);
271+
OS << '\n';
272+
});
269273
}
270274

271275
static LaneBitmask getDefRegMask(const MachineOperand &MO,
@@ -899,9 +903,10 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
899903
auto printRP = [&MF](const GCNRegPressure &RP) {
900904
return Printable([&RP, &MF](raw_ostream &OS) {
901905
OS << format(PFX " %-5d", RP.getSGPRNum())
902-
<< format(" %-5d",
903-
RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
904-
.getAddressableNumArchVGPRs()));
906+
<< format(
907+
" %-5d",
908+
RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
909+
.getArchVGPRAllocationThreshold(MF)));
905910
});
906911
};
907912

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,13 @@ struct GCNRegPressure {
108108
}
109109
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
110110

111-
unsigned getOccupancy(const GCNSubtarget &ST,
112-
unsigned DynamicVGPRBlockSize) const {
113-
return std::min(
114-
ST.getOccupancyWithNumSGPRs(getSGPRNum()),
115-
ST.getOccupancyWithNumVGPRs(
116-
getVGPRNum(ST.hasGFX90AInsts(), ST.getAddressableNumArchVGPRs()),
117-
DynamicVGPRBlockSize));
111+
unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize,
112+
const MachineFunction &MF) const {
113+
return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()),
114+
ST.getOccupancyWithNumVGPRs(
115+
getVGPRNum(ST.hasGFX90AInsts(),
116+
ST.getArchVGPRAllocationThreshold(MF)),
117+
DynamicVGPRBlockSize));
118118
}
119119

120120
void inc(unsigned Reg,
@@ -123,9 +123,10 @@ struct GCNRegPressure {
123123
const MachineRegisterInfo &MRI);
124124

125125
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
126-
unsigned DynamicVGPRBlockSize) const {
127-
return getOccupancy(ST, DynamicVGPRBlockSize) >
128-
O.getOccupancy(ST, DynamicVGPRBlockSize);
126+
unsigned DynamicVGPRBlockSize,
127+
const MachineFunction &MF) const {
128+
return getOccupancy(ST, DynamicVGPRBlockSize, MF) >
129+
O.getOccupancy(ST, DynamicVGPRBlockSize, MF);
129130
}
130131

131132
/// Compares \p this GCNRegpressure to \p O, returning true if \p this is
@@ -551,7 +552,8 @@ bool isEqual(const GCNRPTracker::LiveRegSet &S1,
551552
const GCNRPTracker::LiveRegSet &S2);
552553

553554
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr,
554-
unsigned DynamicVGPRBlockSize = 0);
555+
unsigned DynamicVGPRBlockSize = 0,
556+
const MachineFunction *MF = nullptr);
555557

556558
Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
557559
const MachineRegisterInfo &MRI);

0 commit comments

Comments
 (0)