Skip to content

Commit 6e4e14e

Browse files
committed
[AMDGPU][NextUseAnalysis] PR #156079 review comments addressed
1 parent 804e9d2 commit 6e4e14e

File tree

2 files changed

+106
-93
lines changed

2 files changed

+106
-93
lines changed

llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp

Lines changed: 83 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#include "AMDGPU.h"
2+
#include "AMDGPUNextUseAnalysis.h"
3+
14
#include "llvm/ADT/DenseMap.h"
25
#include "llvm/ADT/PostOrderIterator.h"
36
#include "llvm/ADT/iterator_range.h"
@@ -14,47 +17,48 @@
1417
#include "llvm/Passes/PassPlugin.h"
1518
#include "llvm/Support/Timer.h"
1619

17-
#include "AMDGPU.h"
18-
19-
#include "AMDGPUNextUseAnalysis.h"
20-
2120
#define DEBUG_TYPE "amdgpu-next-use"
2221

2322
using namespace llvm;
2423

25-
// namespace {
24+
// Command-line option to enable timing instrumentation
25+
static cl::opt<bool> EnableTimers("amdgpu-next-use-analysis-timers",
26+
cl::desc("Enable timing for Next Use Analysis"),
27+
cl::init(false), cl::Hidden);
28+
29+
// Static timers for performance tracking across all analysis runs
30+
static llvm::TimerGroup TG("amdgpu-next-use", "AMDGPU Next Use Analysis");
31+
static llvm::Timer AnalyzeTimer("analyze", "Time spent in analyze()", TG);
32+
static llvm::Timer GetDistanceTimer("getNextUseDistance",
33+
"Time spent in getNextUseDistance()", TG);
2634

2735
// Three-tier ranking system for spiller decisions
28-
unsigned NextUseResult::materializeForRank(int64_t stored, unsigned snapshotOffset) const {
29-
int64_t Mat64 = materialize(stored, snapshotOffset);
36+
unsigned NextUseResult::materializeForRank(int64_t Stored, unsigned SnapshotOffset) const {
37+
int64_t Mat64 = materialize(Stored, SnapshotOffset);
3038

3139
// Tier 1: Finite distances (0 to LoopTag-1) → return as-is
3240
// Tier 2: Loop-exit distances (LoopTag to DeadTag-1) → map to 60000-64999 range
3341
// Tier 3: Dead registers (DeadTag+) → return Infinity (65535)
3442
if (Mat64 >= DeadTag) {
3543
return Infinity; // Tier 3: Dead registers get maximum distance
36-
} else if (Mat64 >= LoopTag) {
44+
}
45+
if (Mat64 >= LoopTag) {
3746
// Tier 2: Loop-exit distances get mapped to high range [60000, 64999]
3847
int64_t LoopRemainder = Mat64 - LoopTag;
3948
// Clamp the remainder to fit in available range (5000 values)
4049
unsigned ClampedRemainder = static_cast<unsigned>(
4150
std::min(LoopRemainder, static_cast<int64_t>(4999)));
4251
return 60000 + ClampedRemainder;
43-
} else if (Mat64 <= 0) {
52+
}
53+
if (Mat64 <= 0) {
4454
return 0; // Tier 1: Zero-distance for immediate uses
45-
} else {
46-
return static_cast<unsigned>(Mat64); // Tier 1: Finite distances as-is
4755
}
56+
return static_cast<unsigned>(Mat64); // Tier 1: Finite distances as-is
4857
}
4958

5059

5160
void NextUseResult::init(const MachineFunction &MF) {
52-
TG = new TimerGroup("Next Use Analysis",
53-
"Compilation Timers for Next Use Analysis");
54-
T1 = new Timer("Next Use Analysis", "Time spent in analyse()", *TG);
55-
T2 = new Timer("Next Use Analysis", "Time spent in computeNextUseDistance()",
56-
*TG);
57-
for (auto L : LI->getLoopsInPreorder()) {
61+
for (auto *L : LI->getLoopsInPreorder()) {
5862
SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Exiting;
5963
L->getExitEdges(Exiting);
6064
for (auto P : Exiting) {
@@ -69,32 +73,35 @@ void NextUseResult::analyze(const MachineFunction &MF) {
6973
// function as the analysis users are only interested in the use distances
7074
// relatively to the given MI or the given block end.
7175
DenseMap<unsigned, VRegDistances> UpwardNextUses;
72-
T1->startTimer();
76+
if (EnableTimers)
77+
AnalyzeTimer.startTimer();
7378
bool Changed = true;
7479
while (Changed) {
7580
Changed = false;
76-
for (auto MBB : post_order(&MF)) {
81+
for (const auto *MBB : post_order(&MF)) {
7782
unsigned Offset = 0;
7883
unsigned MBBNum = MBB->getNumber();
7984
VRegDistances Curr, Prev;
8085
if (UpwardNextUses.contains(MBBNum)) {
8186
Prev = UpwardNextUses[MBBNum];
8287
}
8388

84-
LLVM_DEBUG(dbgs() << "\nMerging successors for "
85-
<< "MBB_" << MBB->getNumber() << "." << MBB->getName()
86-
<< "\n";);
89+
LLVM_DEBUG({
90+
dbgs() << "\nMerging successors for "
91+
<< "MBB_" << MBB->getNumber() << "." << MBB->getName() << "\n";
92+
});
8793

88-
for (auto Succ : successors(MBB)) {
94+
for (auto *Succ : successors(MBB)) {
8995
unsigned SuccNum = Succ->getNumber();
9096

9197
if (!UpwardNextUses.contains(SuccNum))
9298
continue;
9399

94100
VRegDistances SuccDist = UpwardNextUses[SuccNum];
95-
LLVM_DEBUG(dbgs() << "\nMerging "
96-
<< "MBB_" << Succ->getNumber() << "."
97-
<< Succ->getName() << "\n");
101+
LLVM_DEBUG({
102+
dbgs() << "\nMerging "
103+
<< "MBB_" << Succ->getNumber() << "." << Succ->getName() << "\n";
104+
});
98105

99106
// Check if the edge from MBB to Succ goes out of the Loop
100107
int64_t EdgeWeight = 0;
@@ -124,10 +131,12 @@ void NextUseResult::analyze(const MachineFunction &MF) {
124131
}
125132
}
126133
}
127-
LLVM_DEBUG(dbgs() << "\nCurr:";
128-
printVregDistances(Curr /*, 0 - we're at the block bottom*/);
129-
dbgs() << "\nSucc:";
130-
printVregDistances(SuccDist, EntryOff[SuccNum], EdgeWeight));
134+
LLVM_DEBUG({
135+
dbgs() << "\nCurr:";
136+
printVregDistances(Curr /*, 0 - we're at the block bottom*/);
137+
dbgs() << "\nSucc:";
138+
printVregDistances(SuccDist, EntryOff[SuccNum], EdgeWeight);
139+
});
131140

132141
// Filter out successor's PHI operands with SourceBlock != MBB
133142
// PHI operands are only live on their specific incoming edge
@@ -147,7 +156,10 @@ void NextUseResult::analyze(const MachineFunction &MF) {
147156
}
148157

149158
Curr.merge(SuccDist, EntryOff[SuccNum], EdgeWeight);
150-
LLVM_DEBUG(dbgs() << "\nCurr after merge:"; printVregDistances(Curr));
159+
LLVM_DEBUG({
160+
dbgs() << "\nCurr after merge:";
161+
printVregDistances(Curr);
162+
});
151163
}
152164

153165
NextUseMap[MBBNum].Bottom = Curr;
@@ -180,12 +192,16 @@ void NextUseResult::analyze(const MachineFunction &MF) {
180192
// EntryOff needs the TOTAL instruction count for correct predecessor distances
181193
// while InstrOffset uses individual instruction offsets for materialization
182194

183-
LLVM_DEBUG(dbgs() << "\nFinal distances for MBB_" << MBB->getNumber()
184-
<< "." << MBB->getName() << "\n";
185-
printVregDistances(Curr, Offset));
186-
LLVM_DEBUG(dbgs() << "\nPrevious distances for MBB_" << MBB->getNumber()
187-
<< "." << MBB->getName() << "\n";
188-
printVregDistances(Prev, Offset));
195+
LLVM_DEBUG({
196+
dbgs() << "\nFinal distances for MBB_" << MBB->getNumber() << "."
197+
<< MBB->getName() << "\n";
198+
printVregDistances(Curr, Offset);
199+
dbgs() << "\nPrevious distances for MBB_" << MBB->getNumber() << "."
200+
<< MBB->getName() << "\n";
201+
printVregDistances(Prev, Offset);
202+
dbgs() << "\nUsed in block:\n";
203+
dumpUsedInBlock();
204+
});
189205

190206
// EntryOff -offset of the first instruction in the block top-down walk
191207
EntryOff[MBBNum] = Offset;
@@ -196,18 +212,21 @@ void NextUseResult::analyze(const MachineFunction &MF) {
196212
Changed |= Changed4MBB;
197213
}
198214
}
199-
// dumpUsedInBlock();
200215
// Dump complete analysis results for testing
201216
LLVM_DEBUG(dumpAllNextUseDistances(MF));
202-
T1->stopTimer();
203-
LLVM_DEBUG(TG->print(llvm::errs()));
217+
if (EnableTimers) {
218+
AnalyzeTimer.stopTimer();
219+
TG.print(llvm::errs());
220+
}
204221
}
205222

206223
void NextUseResult::getFromSortedRecords(
207224
const VRegDistances::SortedRecords &Dists, LaneBitmask Mask,
208225
unsigned SnapshotOffset, unsigned &D) {
209-
LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(Mask) << "] "
210-
<< "SnapshotOffset=" << SnapshotOffset << "\n");
226+
LLVM_DEBUG({
227+
dbgs() << "Mask : [" << PrintLaneMask(Mask) << "] "
228+
<< "SnapshotOffset=" << SnapshotOffset << "\n";
229+
});
211230

212231
// Records are sorted by stored value in increasing order. Since all entries
213232
// in this snapshot share the same SnapshotOffset, ordering by stored value
@@ -240,12 +259,14 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock::iterator I,
240259
if (NextUseMap[MBBNum].InstrDist[&*I].contains(VMP.getVReg())) {
241260
VRegDistances::SortedRecords Dists =
242261
NextUseMap[MBBNum].InstrDist[&*I][VMP.getVReg()];
243-
LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask())
244-
<< "]\n");
262+
LLVM_DEBUG({
263+
dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n";
264+
});
245265
for (auto P : reverse(Dists)) {
246266
LaneBitmask UseMask = P.first;
247-
LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask)
248-
<< "]\n");
267+
LLVM_DEBUG({
268+
dbgs() << "Used mask : [" << PrintLaneMask(UseMask) << "]\n";
269+
});
249270
if ((UseMask & VMP.getLaneMask()) == UseMask) {
250271
Result.push_back({VMP.getVReg(), UseMask});
251272
}
@@ -264,8 +285,9 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
264285
NextUseMap[MBBNum].Bottom.contains(VMP.getVReg())) {
265286
VRegDistances::SortedRecords Dists =
266287
NextUseMap[MBBNum].Bottom[VMP.getVReg()];
267-
LLVM_DEBUG(dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask())
268-
<< "]\n");
288+
LLVM_DEBUG({
289+
dbgs() << "Mask : [" << PrintLaneMask(VMP.getLaneMask()) << "]\n";
290+
});
269291
for (auto P : reverse(Dists)) {
270292
LaneBitmask UseMask = P.first;
271293
LLVM_DEBUG(dbgs() << "Used mask : [" << PrintLaneMask(UseMask) << "]\n");
@@ -278,18 +300,20 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
278300
}
279301

280302
void NextUseResult::dumpUsedInBlock() {
281-
LLVM_DEBUG(for (auto P
282-
: UsedInBlock) {
303+
for (auto P : UsedInBlock) {
283304
dbgs() << "MBB_" << P.first << ":\n";
284305
for (auto VMP : P.second) {
285306
dbgs() << "[ " << printReg(VMP.getVReg()) << " : <"
286307
<< PrintLaneMask(VMP.getLaneMask()) << "> ]\n";
287308
}
288-
});
309+
}
289310
}
290311

291312
unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock::iterator I,
292313
const VRegMaskPair VMP) {
314+
if (EnableTimers)
315+
GetDistanceTimer.startTimer();
316+
293317
unsigned Dist = Infinity;
294318
const MachineBasicBlock *MBB = I->getParent();
295319
unsigned MBBNum = MBB->getNumber();
@@ -304,11 +328,16 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock::iterator I,
304328
}
305329
}
306330

331+
if (EnableTimers)
332+
GetDistanceTimer.stopTimer();
307333
return Dist;
308334
}
309335

310336
unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
311337
const VRegMaskPair VMP) {
338+
if (EnableTimers)
339+
GetDistanceTimer.startTimer();
340+
312341
unsigned Dist = Infinity;
313342
unsigned MBBNum = MBB.getNumber();
314343
if (NextUseMap.contains(MBBNum)) {
@@ -317,6 +346,9 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
317346
VMP.getLaneMask(), 0, Dist);
318347
}
319348
}
349+
350+
if (EnableTimers)
351+
GetDistanceTimer.stopTimer();
320352
return Dist;
321353
}
322354

@@ -330,19 +362,6 @@ AMDGPUNextUseAnalysis::run(MachineFunction &MF,
330362

331363
AnalysisKey AMDGPUNextUseAnalysis::Key;
332364

333-
//} // namespace
334-
335-
extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo
336-
llvmGetPassPluginInfo() {
337-
return {LLVM_PLUGIN_API_VERSION, "AMDGPUNextUseAnalysisPass",
338-
LLVM_VERSION_STRING, [](PassBuilder &PB) {
339-
PB.registerAnalysisRegistrationCallback(
340-
[](MachineFunctionAnalysisManager &MFAM) {
341-
MFAM.registerPass([] { return AMDGPUNextUseAnalysis(); });
342-
});
343-
}};
344-
}
345-
346365
char AMDGPUNextUseAnalysisWrapper::ID = 0;
347366
char &llvm::AMDGPUNextUseAnalysisID = AMDGPUNextUseAnalysisWrapper::ID;
348367
INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisWrapper, "amdgpu-next-use",

0 commit comments

Comments
 (0)