Skip to content

Commit 28c8172

Browse files
committed
[AMDGPU] Add register usage diagnostics at the point of maximum register pressure.
1 parent 375f489 commit 28c8172

File tree

3 files changed

+187
-6
lines changed

3 files changed

+187
-6
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNRegPressure.h"
1515
#include "AMDGPU.h"
1616
#include "SIMachineFunctionInfo.h"
17+
#include "llvm/CodeGen/MachineLoopInfo.h"
1718
#include "llvm/CodeGen/RegisterPressure.h"
1819

1920
using namespace llvm;
@@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
459460

460461
GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
461462
const LiveIntervals &LIS,
462-
const MachineRegisterInfo &MRI) {
463+
const MachineRegisterInfo &MRI,
464+
GCNRegPressure::RegKind RegKind) {
463465
GCNRPTracker::LiveRegSet LiveRegs;
464466
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
465467
auto Reg = Register::index2VirtReg(I);
468+
if (RegKind != GCNRegPressure::TOTAL_KINDS &&
469+
GCNRegPressure::getRegKind(Reg, MRI) != RegKind)
470+
continue;
466471
if (!LIS.hasInterval(Reg))
467472
continue;
468473
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
@@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
986991

987992
#undef PFX
988993
}
994+
995+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
996+
LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
997+
GCNRegPressure::RegKind Kind,
998+
LiveIntervals &LIS,
999+
const MachineLoopInfo *MLI) {
1000+
1001+
const MachineRegisterInfo &MRI = MF.getRegInfo();
1002+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1003+
auto &OS = dbgs();
1004+
const char *RegName = GCNRegPressure::getName(Kind);
1005+
1006+
unsigned MaxNumRegs = 0;
1007+
MachineInstr *MaxPressureMI = nullptr;
1008+
GCNUpwardRPTracker RPT(LIS);
1009+
for (auto &MBB : MF) {
1010+
RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
1011+
for (auto &MI : reverse(MBB)) {
1012+
RPT.recede(MI);
1013+
unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
1014+
if (NumRegs > MaxNumRegs) {
1015+
MaxNumRegs = NumRegs;
1016+
MaxPressureMI = &MI;
1017+
}
1018+
}
1019+
}
1020+
1021+
SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);
1022+
1023+
// Max pressure can occur at either the early-clobber or register slot.
1024+
// Choose the maximum liveset between both slots. This is ugly but this is
1025+
// diagnostic code.
1026+
SlotIndex ECSlot = MISlot.getRegSlot(true);
1027+
SlotIndex RSlot = MISlot.getRegSlot(false);
1028+
GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind);
1029+
GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind);
1030+
unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind);
1031+
unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
1032+
GCNRPTracker::LiveRegSet *LiveSet =
1033+
ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
1034+
SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
1035+
assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);
1036+
1037+
// Split live registers into single-def and multi-def sets.
1038+
GCNRegPressure SDefPressure, MDefPressure;
1039+
SmallVector<Register, 16> SDefRegs, MDefRegs;
1040+
for (auto [Reg, LaneMask] : *LiveSet) {
1041+
assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind);
1042+
LiveInterval &LI = LIS.getInterval(Reg);
1043+
if (LI.getNumValNums() == 1 ||
1044+
(LI.hasSubRanges() &&
1045+
llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) {
1046+
return SR.getNumValNums() == 1;
1047+
}))) {
1048+
SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
1049+
SDefRegs.push_back(Reg);
1050+
} else {
1051+
MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
1052+
MDefRegs.push_back(Reg);
1053+
}
1054+
}
1055+
unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind);
1056+
unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind);
1057+
assert(SDefNumRegs + MDefNumRegs == MaxNumRegs);
1058+
1059+
auto printLoc = [&](MachineBasicBlock *MBB, SlotIndex SI) {
1060+
return Printable([&, MBB, SI](raw_ostream &OS) {
1061+
OS << SI << "@BB." << MBB->getNumber();
1062+
if (MLI)
1063+
if (const MachineLoop *ML = MLI->getLoopFor(MBB))
1064+
OS << " (LoopHdr BB." << ML->getHeader()->getNumber() << ", Depth "
1065+
<< ML->getLoopDepth() << ")";
1066+
});
1067+
};
1068+
1069+
auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) {
1070+
GCNRegPressure RegPressure;
1071+
RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI);
1072+
OS << " " << printReg(Reg, TRI) << ':'
1073+
<< TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask "
1074+
<< PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' '
1075+
<< RegName << "s)\n";
1076+
1077+
// Use std::map to sort def/uses by SlotIndex.
1078+
std::map<SlotIndex, MachineInstr *> Instrs;
1079+
for (auto &MI : MRI.reg_nodbg_instructions(Reg)) {
1080+
Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI;
1081+
}
1082+
1083+
for (const auto &[SI, MI] : Instrs) {
1084+
OS << " ";
1085+
if (MI->definesRegister(Reg, TRI))
1086+
OS << "def ";
1087+
if (MI->readsRegister(Reg, TRI))
1088+
OS << "use ";
1089+
OS << printLoc(MI->getParent(), SI) << ": " << *MI;
1090+
}
1091+
};
1092+
1093+
OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
1094+
<< " ***\n";
1095+
OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
1096+
<< printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
1097+
<< *MaxPressureMI;
1098+
1099+
OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
1100+
<< RegName << "s):\n";
1101+
1102+
// Sort OneDefRegs by number of uses (smallest first)
1103+
llvm::sort(SDefRegs, [&](Register A, Register B) {
1104+
return std::distance(MRI.use_begin(A), MRI.use_end()) <
1105+
std::distance(MRI.use_begin(B), MRI.use_end());
1106+
});
1107+
1108+
for (const auto Reg : SDefRegs) {
1109+
PrintRegInfo(Reg, LiveSet->lookup(Reg));
1110+
}
1111+
1112+
OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' '
1113+
<< RegName << "s):\n";
1114+
for (const auto Reg : MDefRegs) {
1115+
PrintRegInfo(Reg, LiveSet->lookup(Reg));
1116+
}
1117+
}
1118+
#endif

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ struct GCNRegPressure {
4141

4242
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
4343

44+
unsigned getNumRegs(RegKind Kind) const {
45+
assert(Kind < TOTAL_KINDS);
46+
return Value[Kind];
47+
}
48+
4449
/// \returns the SGPR32 pressure
4550
unsigned getSGPRNum() const { return Value[SGPR]; }
4651
/// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
@@ -138,6 +143,18 @@ struct GCNRegPressure {
138143

139144
void dump() const;
140145

146+
static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) {
147+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
148+
const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
149+
return (RegKind)getRegKind(MRI.getRegClass(Reg), STI);
150+
}
151+
152+
static const char *getName(RegKind Kind) {
153+
const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"};
154+
assert(Kind < TOTAL_KINDS);
155+
return Names[Kind];
156+
}
157+
141158
private:
142159
static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2;
143160

@@ -294,8 +311,10 @@ class GCNRPTracker {
294311
}
295312
};
296313

297-
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
298-
const MachineRegisterInfo &MRI);
314+
GCNRPTracker::LiveRegSet
315+
getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
316+
const MachineRegisterInfo &MRI,
317+
GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
299318

300319
////////////////////////////////////////////////////////////////////////////////
301320
// GCNUpwardRPTracker
@@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
428447
const MachineRegisterInfo &MRI,
429448
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
430449

431-
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
432-
const MachineRegisterInfo &MRI);
433-
434450
/// creates a map MachineInstr -> LiveRegSet
435451
/// R - range of iterators on instructions
436452
/// After - upon entry or exit of every instruction
@@ -524,6 +540,11 @@ struct GCNRegPressurePrinter : public MachineFunctionPass {
524540
}
525541
};
526542

543+
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF,
544+
GCNRegPressure::RegKind Kind,
545+
LiveIntervals &LIS,
546+
const MachineLoopInfo *MLI);
547+
527548
} // end namespace llvm
528549

529550
#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,21 @@ static cl::opt<bool> GCNTrackers(
6969
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
7070
cl::init(false));
7171

72+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
73+
#define DUMP_MAX_REG_PRESSURE
74+
static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(
75+
"amdgpu-print-maxrp-regusage-before-scheduler", cl::Hidden,
76+
cl::desc("Print a list of live registers along with their def/uses at the "
77+
"point of maximum register pressure before scheduling."),
78+
cl::init(false));
79+
80+
static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler(
81+
"amdgpu-print-maxrp-regusage-after-scheduler", cl::Hidden,
82+
cl::desc("Print a list of live registers along with their def/uses at the "
83+
"point of maximum register pressure after scheduling."),
84+
cl::init(false));
85+
#endif
86+
7287
const unsigned ScheduleMetrics::ScaleFactor = 100;
7388

7489
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -960,6 +975,16 @@ void GCNScheduleDAGMILive::runSchedStages() {
960975
RegionLiveOuts.buildLiveRegMap();
961976
}
962977

978+
#ifdef DUMP_MAX_REG_PRESSURE
979+
auto dumpRegUsage = [this]() {
980+
dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);
981+
dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);
982+
LIS->dump();
983+
};
984+
if (PrintMaxRPRegUsageBeforeScheduler)
985+
dumpRegUsage();
986+
#endif
987+
963988
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
964989
while (S.advanceStage()) {
965990
auto Stage = createSchedStage(S.getCurrentStage());
@@ -995,6 +1020,11 @@ void GCNScheduleDAGMILive::runSchedStages() {
9951020

9961021
Stage->finalizeGCNSchedStage();
9971022
}
1023+
1024+
#ifdef DUMP_MAX_REG_PRESSURE
1025+
if (PrintMaxRPRegUsageAfterScheduler)
1026+
dumpRegUsage();
1027+
#endif
9981028
}
9991029

10001030
#ifndef NDEBUG

0 commit comments

Comments
 (0)