1+ #include " AMDGPU.h"
2+ #include " AMDGPUNextUseAnalysis.h"
3+
14#include " llvm/ADT/DenseMap.h"
25#include " llvm/ADT/PostOrderIterator.h"
36#include " llvm/ADT/iterator_range.h"
1417#include " llvm/Passes/PassPlugin.h"
1518#include " llvm/Support/Timer.h"
1619
17- #include " AMDGPU.h"
18-
19- #include " AMDGPUNextUseAnalysis.h"
20-
2120#define DEBUG_TYPE " amdgpu-next-use"
2221
2322using namespace llvm ;
2423
25- // namespace {
24+ // Command-line option to enable timing instrumentation
25+ static cl::opt<bool > EnableTimers (" amdgpu-next-use-analysis-timers" ,
26+ cl::desc (" Enable timing for Next Use Analysis" ),
27+ cl::init(false ), cl::Hidden);
28+
29+ // Static timers for performance tracking across all analysis runs
30+ static llvm::TimerGroup TG (" amdgpu-next-use" , " AMDGPU Next Use Analysis" );
31+ static llvm::Timer AnalyzeTimer (" analyze" , " Time spent in analyze()" , TG);
32+ static llvm::Timer GetDistanceTimer (" getNextUseDistance" ,
33+ " Time spent in getNextUseDistance()" , TG);
2634
2735// Three-tier ranking system for spiller decisions
28- unsigned NextUseResult::materializeForRank (int64_t stored , unsigned snapshotOffset ) const {
29- int64_t Mat64 = materialize (stored, snapshotOffset );
36+ unsigned NextUseResult::materializeForRank (int64_t Stored , unsigned SnapshotOffset ) const {
37+ int64_t Mat64 = materialize (Stored, SnapshotOffset );
3038
3139 // Tier 1: Finite distances (0 to LoopTag-1) → return as-is
3240 // Tier 2: Loop-exit distances (LoopTag to DeadTag-1) → map to 60000-64999 range
3341 // Tier 3: Dead registers (DeadTag+) → return Infinity (65535)
3442 if (Mat64 >= DeadTag) {
3543 return Infinity; // Tier 3: Dead registers get maximum distance
36- } else if (Mat64 >= LoopTag) {
44+ }
45+ if (Mat64 >= LoopTag) {
3746 // Tier 2: Loop-exit distances get mapped to high range [60000, 64999]
3847 int64_t LoopRemainder = Mat64 - LoopTag;
3948 // Clamp the remainder to fit in available range (5000 values)
4049 unsigned ClampedRemainder = static_cast <unsigned >(
4150 std::min (LoopRemainder, static_cast <int64_t >(4999 )));
4251 return 60000 + ClampedRemainder;
43- } else if (Mat64 <= 0 ) {
52+ }
53+ if (Mat64 <= 0 ) {
4454 return 0 ; // Tier 1: Zero-distance for immediate uses
45- } else {
46- return static_cast <unsigned >(Mat64); // Tier 1: Finite distances as-is
4755 }
56+ return static_cast <unsigned >(Mat64); // Tier 1: Finite distances as-is
4857}
4958
5059
5160void NextUseResult::init (const MachineFunction &MF) {
52- TG = new TimerGroup (" Next Use Analysis" ,
53- " Compilation Timers for Next Use Analysis" );
54- T1 = new Timer (" Next Use Analysis" , " Time spent in analyse()" , *TG);
55- T2 = new Timer (" Next Use Analysis" , " Time spent in computeNextUseDistance()" ,
56- *TG);
57- for (auto L : LI->getLoopsInPreorder ()) {
61+ for (auto *L : LI->getLoopsInPreorder ()) {
5862 SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Exiting;
5963 L->getExitEdges (Exiting);
6064 for (auto P : Exiting) {
@@ -69,32 +73,35 @@ void NextUseResult::analyze(const MachineFunction &MF) {
6973 // function as the analysis users are only interested in the use distances
7074 // relatively to the given MI or the given block end.
7175 DenseMap<unsigned , VRegDistances> UpwardNextUses;
72- T1->startTimer ();
76+ if (EnableTimers)
77+ AnalyzeTimer.startTimer ();
7378 bool Changed = true ;
7479 while (Changed) {
7580 Changed = false ;
76- for (auto MBB : post_order (&MF)) {
81+ for (const auto * MBB : post_order (&MF)) {
7782 unsigned Offset = 0 ;
7883 unsigned MBBNum = MBB->getNumber ();
7984 VRegDistances Curr, Prev;
8085 if (UpwardNextUses.contains (MBBNum)) {
8186 Prev = UpwardNextUses[MBBNum];
8287 }
8388
84- LLVM_DEBUG (dbgs () << " \n Merging successors for "
85- << " MBB_" << MBB->getNumber () << " ." << MBB->getName ()
86- << " \n " ;);
89+ LLVM_DEBUG ({
90+ dbgs () << " \n Merging successors for "
91+ << " MBB_" << MBB->getNumber () << " ." << MBB->getName () << " \n " ;
92+ });
8793
88- for (auto Succ : successors (MBB)) {
94+ for (auto * Succ : successors (MBB)) {
8995 unsigned SuccNum = Succ->getNumber ();
9096
9197 if (!UpwardNextUses.contains (SuccNum))
9298 continue ;
9399
94100 VRegDistances SuccDist = UpwardNextUses[SuccNum];
95- LLVM_DEBUG (dbgs () << " \n Merging "
96- << " MBB_" << Succ->getNumber () << " ."
97- << Succ->getName () << " \n " );
101+ LLVM_DEBUG ({
102+ dbgs () << " \n Merging "
103+ << " MBB_" << Succ->getNumber () << " ." << Succ->getName () << " \n " ;
104+ });
98105
99106 // Check if the edge from MBB to Succ goes out of the Loop
100107 int64_t EdgeWeight = 0 ;
@@ -124,10 +131,12 @@ void NextUseResult::analyze(const MachineFunction &MF) {
124131 }
125132 }
126133 }
127- LLVM_DEBUG (dbgs () << " \n Curr:" ;
128- printVregDistances (Curr /* , 0 - we're at the block bottom*/ );
129- dbgs () << " \n Succ:" ;
130- printVregDistances (SuccDist, EntryOff[SuccNum], EdgeWeight));
134+ LLVM_DEBUG ({
135+ dbgs () << " \n Curr:" ;
136+ printVregDistances (Curr /* , 0 - we're at the block bottom*/ );
137+ dbgs () << " \n Succ:" ;
138+ printVregDistances (SuccDist, EntryOff[SuccNum], EdgeWeight);
139+ });
131140
132141 // Filter out successor's PHI operands with SourceBlock != MBB
133142 // PHI operands are only live on their specific incoming edge
@@ -147,7 +156,10 @@ void NextUseResult::analyze(const MachineFunction &MF) {
147156 }
148157
149158 Curr.merge (SuccDist, EntryOff[SuccNum], EdgeWeight);
150- LLVM_DEBUG (dbgs () << " \n Curr after merge:" ; printVregDistances (Curr));
159+ LLVM_DEBUG ({
160+ dbgs () << " \n Curr after merge:" ;
161+ printVregDistances (Curr);
162+ });
151163 }
152164
153165 NextUseMap[MBBNum].Bottom = Curr;
@@ -180,12 +192,16 @@ void NextUseResult::analyze(const MachineFunction &MF) {
180192 // EntryOff needs the TOTAL instruction count for correct predecessor distances
181193 // while InstrOffset uses individual instruction offsets for materialization
182194
183- LLVM_DEBUG (dbgs () << " \n Final distances for MBB_" << MBB->getNumber ()
184- << " ." << MBB->getName () << " \n " ;
185- printVregDistances (Curr, Offset));
186- LLVM_DEBUG (dbgs () << " \n Previous distances for MBB_" << MBB->getNumber ()
187- << " ." << MBB->getName () << " \n " ;
188- printVregDistances (Prev, Offset));
195+ LLVM_DEBUG ({
196+ dbgs () << " \n Final distances for MBB_" << MBB->getNumber () << " ."
197+ << MBB->getName () << " \n " ;
198+ printVregDistances (Curr, Offset);
199+ dbgs () << " \n Previous distances for MBB_" << MBB->getNumber () << " ."
200+ << MBB->getName () << " \n " ;
201+ printVregDistances (Prev, Offset);
202+ dbgs () << " \n Used in block:\n " ;
203+ dumpUsedInBlock ();
204+ });
189205
190206 // EntryOff -offset of the first instruction in the block top-down walk
191207 EntryOff[MBBNum] = Offset;
@@ -196,18 +212,21 @@ void NextUseResult::analyze(const MachineFunction &MF) {
196212 Changed |= Changed4MBB;
197213 }
198214 }
199- // dumpUsedInBlock();
200215 // Dump complete analysis results for testing
201216 LLVM_DEBUG (dumpAllNextUseDistances (MF));
202- T1->stopTimer ();
203- LLVM_DEBUG (TG->print (llvm::errs ()));
217+ if (EnableTimers) {
218+ AnalyzeTimer.stopTimer ();
219+ TG.print (llvm::errs ());
220+ }
204221}
205222
206223void NextUseResult::getFromSortedRecords (
207224 const VRegDistances::SortedRecords &Dists, LaneBitmask Mask,
208225 unsigned SnapshotOffset, unsigned &D) {
209- LLVM_DEBUG (dbgs () << " Mask : [" << PrintLaneMask (Mask) << " ] "
210- << " SnapshotOffset=" << SnapshotOffset << " \n " );
226+ LLVM_DEBUG ({
227+ dbgs () << " Mask : [" << PrintLaneMask (Mask) << " ] "
228+ << " SnapshotOffset=" << SnapshotOffset << " \n " ;
229+ });
211230
212231 // Records are sorted by stored value in increasing order. Since all entries
213232 // in this snapshot share the same SnapshotOffset, ordering by stored value
@@ -240,12 +259,14 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock::iterator I,
240259 if (NextUseMap[MBBNum].InstrDist [&*I].contains (VMP.getVReg ())) {
241260 VRegDistances::SortedRecords Dists =
242261 NextUseMap[MBBNum].InstrDist [&*I][VMP.getVReg ()];
243- LLVM_DEBUG (dbgs () << " Mask : [" << PrintLaneMask (VMP.getLaneMask ())
244- << " ]\n " );
262+ LLVM_DEBUG ({
263+ dbgs () << " Mask : [" << PrintLaneMask (VMP.getLaneMask ()) << " ]\n " ;
264+ });
245265 for (auto P : reverse (Dists)) {
246266 LaneBitmask UseMask = P.first ;
247- LLVM_DEBUG (dbgs () << " Used mask : [" << PrintLaneMask (UseMask)
248- << " ]\n " );
267+ LLVM_DEBUG ({
268+ dbgs () << " Used mask : [" << PrintLaneMask (UseMask) << " ]\n " ;
269+ });
249270 if ((UseMask & VMP.getLaneMask ()) == UseMask) {
250271 Result.push_back ({VMP.getVReg (), UseMask});
251272 }
@@ -264,8 +285,9 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
264285 NextUseMap[MBBNum].Bottom .contains (VMP.getVReg ())) {
265286 VRegDistances::SortedRecords Dists =
266287 NextUseMap[MBBNum].Bottom [VMP.getVReg ()];
267- LLVM_DEBUG (dbgs () << " Mask : [" << PrintLaneMask (VMP.getLaneMask ())
268- << " ]\n " );
288+ LLVM_DEBUG ({
289+ dbgs () << " Mask : [" << PrintLaneMask (VMP.getLaneMask ()) << " ]\n " ;
290+ });
269291 for (auto P : reverse (Dists)) {
270292 LaneBitmask UseMask = P.first ;
271293 LLVM_DEBUG (dbgs () << " Used mask : [" << PrintLaneMask (UseMask) << " ]\n " );
@@ -278,18 +300,20 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
278300}
279301
280302void NextUseResult::dumpUsedInBlock () {
281- LLVM_DEBUG (for (auto P
282- : UsedInBlock) {
303+ for (auto P : UsedInBlock) {
283304 dbgs () << " MBB_" << P.first << " :\n " ;
284305 for (auto VMP : P.second ) {
285306 dbgs () << " [ " << printReg (VMP.getVReg ()) << " : <"
286307 << PrintLaneMask (VMP.getLaneMask ()) << " > ]\n " ;
287308 }
288- });
309+ }
289310}
290311
291312unsigned NextUseResult::getNextUseDistance (const MachineBasicBlock::iterator I,
292313 const VRegMaskPair VMP) {
314+ if (EnableTimers)
315+ GetDistanceTimer.startTimer ();
316+
293317 unsigned Dist = Infinity;
294318 const MachineBasicBlock *MBB = I->getParent ();
295319 unsigned MBBNum = MBB->getNumber ();
@@ -304,11 +328,16 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock::iterator I,
304328 }
305329 }
306330
331+ if (EnableTimers)
332+ GetDistanceTimer.stopTimer ();
307333 return Dist;
308334}
309335
310336unsigned NextUseResult::getNextUseDistance (const MachineBasicBlock &MBB,
311337 const VRegMaskPair VMP) {
338+ if (EnableTimers)
339+ GetDistanceTimer.startTimer ();
340+
312341 unsigned Dist = Infinity;
313342 unsigned MBBNum = MBB.getNumber ();
314343 if (NextUseMap.contains (MBBNum)) {
@@ -317,6 +346,9 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
317346 VMP.getLaneMask (), 0 , Dist);
318347 }
319348 }
349+
350+ if (EnableTimers)
351+ GetDistanceTimer.stopTimer ();
320352 return Dist;
321353}
322354
@@ -330,19 +362,6 @@ AMDGPUNextUseAnalysis::run(MachineFunction &MF,
330362
331363AnalysisKey AMDGPUNextUseAnalysis::Key;
332364
333- // } // namespace
334-
335- extern " C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo
336- llvmGetPassPluginInfo () {
337- return {LLVM_PLUGIN_API_VERSION, " AMDGPUNextUseAnalysisPass" ,
338- LLVM_VERSION_STRING, [](PassBuilder &PB) {
339- PB.registerAnalysisRegistrationCallback (
340- [](MachineFunctionAnalysisManager &MFAM) {
341- MFAM.registerPass ([] { return AMDGPUNextUseAnalysis (); });
342- });
343- }};
344- }
345-
346365char AMDGPUNextUseAnalysisWrapper::ID = 0 ;
347366char &llvm::AMDGPUNextUseAnalysisID = AMDGPUNextUseAnalysisWrapper::ID;
348367INITIALIZE_PASS_BEGIN (AMDGPUNextUseAnalysisWrapper, " amdgpu-next-use" ,
0 commit comments