1- #include " AMDGPU.h"
21#include " AMDGPUNextUseAnalysis.h"
2+ #include " AMDGPU.h"
33
44#include " llvm/ADT/DenseMap.h"
55#include " llvm/ADT/PostOrderIterator.h"
2222using namespace llvm ;
2323
2424// Command-line option to enable timing instrumentation
25- static cl::opt<bool > EnableTimers (" amdgpu-next-use-analysis-timers" ,
26- cl::desc (" Enable timing for Next Use Analysis" ),
27- cl::init(false ), cl::Hidden);
25+ static cl::opt<bool >
26+ EnableTimers (" amdgpu-next-use-analysis-timers" ,
27+ cl::desc (" Enable timing for Next Use Analysis" ),
28+ cl::init(false ), cl::Hidden);
2829
2930// Static timers for performance tracking across all analysis runs
3031static llvm::TimerGroup TG (" amdgpu-next-use" , " AMDGPU Next Use Analysis" );
3132static llvm::Timer AnalyzeTimer (" analyze" , " Time spent in analyze()" , TG);
32- static llvm::Timer GetDistanceTimer (" getNextUseDistance" ,
33- " Time spent in getNextUseDistance()" , TG);
33+ static llvm::Timer GetDistanceTimer (" getNextUseDistance" ,
34+ " Time spent in getNextUseDistance()" , TG);
3435
3536// Three-tier ranking system for spiller decisions
36- unsigned NextUseResult::materializeForRank (int64_t Stored, unsigned SnapshotOffset) const {
37+ unsigned NextUseResult::materializeForRank (int64_t Stored,
38+ unsigned SnapshotOffset) const {
3739 int64_t Mat64 = materialize (Stored, SnapshotOffset);
3840
3941 // Tier 1: Finite distances (0 to LoopTag-1) → return as-is
40- // Tier 2: Loop-exit distances (LoopTag to DeadTag-1) → map to 60000-64999 range
41- // Tier 3: Dead registers (DeadTag+) → return Infinity (65535)
42+ // Tier 2: Loop-exit distances (LoopTag to DeadTag-1) → map to 60000-64999
43+ // range Tier 3: Dead registers (DeadTag+) → return Infinity (65535)
4244 if (Mat64 >= DeadTag) {
43- return Infinity; // Tier 3: Dead registers get maximum distance
45+ return Infinity; // Tier 3: Dead registers get maximum distance
4446 }
4547 if (Mat64 >= LoopTag) {
4648 // Tier 2: Loop-exit distances get mapped to high range [60000, 64999]
@@ -51,12 +53,11 @@ unsigned NextUseResult::materializeForRank(int64_t Stored, unsigned SnapshotOffs
5153 return 60000 + ClampedRemainder;
5254 }
5355 if (Mat64 <= 0 ) {
54- return 0 ; // Tier 1: Zero-distance for immediate uses
56+ return 0 ; // Tier 1: Zero-distance for immediate uses
5557 }
56- return static_cast <unsigned >(Mat64); // Tier 1: Finite distances as-is
58+ return static_cast <unsigned >(Mat64); // Tier 1: Finite distances as-is
5759}
5860
59-
6061void NextUseResult::init (const MachineFunction &MF) {
6162 for (auto *L : LI->getLoopsInPreorder ()) {
6263 SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Exiting;
@@ -100,7 +101,8 @@ void NextUseResult::analyze(const MachineFunction &MF) {
100101 VRegDistances SuccDist = UpwardNextUses[SuccNum];
101102 LLVM_DEBUG ({
102103 dbgs () << " \n Merging "
103- << " MBB_" << Succ->getNumber () << " ." << Succ->getName () << " \n " ;
104+ << " MBB_" << Succ->getNumber () << " ." << Succ->getName ()
105+ << " \n " ;
104106 });
105107
106108 // Check if the edge from MBB to Succ goes out of the Loop
@@ -116,7 +118,8 @@ void NextUseResult::analyze(const MachineFunction &MF) {
116118 // Clear out the Loop-Exiting weights.
117119 for (auto &P : SuccDist) {
118120 auto &Dists = P.second ;
119- // Collect items that need to be updated to avoid iterator invalidation
121+ // Collect items that need to be updated to avoid iterator
122+ // invalidation
120123 SmallVector<std::pair<LaneBitmask, int64_t >, 4 > ToUpdate;
121124 for (auto R : Dists) {
122125 if (R.second >= LoopTag) {
@@ -189,8 +192,9 @@ void NextUseResult::analyze(const MachineFunction &MF) {
189192 ++Offset;
190193 }
191194
192- // EntryOff needs the TOTAL instruction count for correct predecessor distances
193- // while InstrOffset uses individual instruction offsets for materialization
195+ // EntryOff needs the TOTAL instruction count for correct predecessor
196+ // distances while InstrOffset uses individual instruction offsets for
197+ // materialization
194198
195199 LLVM_DEBUG ({
196200 dbgs () << " \n Final distances for MBB_" << MBB->getNumber () << " ."
@@ -237,8 +241,10 @@ void NextUseResult::getFromSortedRecords(
237241
238242 // Check for any overlap between the queried mask and the use mask.
239243 // This handles both subregister and superregister uses:
240- // - If UseMask covers Mask: superregister use (e.g., querying sub0, finding full reg)
241- // - If Mask covers UseMask: subregister use (e.g., querying full reg, finding sub0)
244+ // - If UseMask covers Mask: superregister use (e.g., querying sub0, finding
245+ // full reg)
246+ // - If Mask covers UseMask: subregister use (e.g., querying full reg,
247+ // finding sub0)
242248 // - If they overlap partially: partial overlap (both are valid uses)
243249 if ((Mask & UseMask).any ()) {
244250 // Use materializeForRank for three-tier ranking system
@@ -267,9 +273,8 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock::iterator I,
267273 });
268274 for (auto P : reverse (Dists)) {
269275 LaneBitmask UseMask = P.first ;
270- LLVM_DEBUG ({
271- dbgs () << " Used mask : [" << PrintLaneMask (UseMask) << " ]\n " ;
272- });
276+ LLVM_DEBUG (
277+ { dbgs () << " Used mask : [" << PrintLaneMask (UseMask) << " ]\n " ; });
273278 if ((UseMask & VMP.getLaneMask ()) == UseMask) {
274279 Result.push_back ({VMP.getVReg (), UseMask});
275280 }
@@ -288,9 +293,8 @@ NextUseResult::getSortedSubregUses(const MachineBasicBlock &MBB,
288293 NextUseMap[MBBNum].Bottom .contains (VMP.getVReg ())) {
289294 VRegDistances::SortedRecords Dists =
290295 NextUseMap[MBBNum].Bottom [VMP.getVReg ()];
291- LLVM_DEBUG ({
292- dbgs () << " Mask : [" << PrintLaneMask (VMP.getLaneMask ()) << " ]\n " ;
293- });
296+ LLVM_DEBUG (
297+ { dbgs () << " Mask : [" << PrintLaneMask (VMP.getLaneMask ()) << " ]\n " ; });
294298 for (auto P : reverse (Dists)) {
295299 LaneBitmask UseMask = P.first ;
296300 LLVM_DEBUG (dbgs () << " Used mask : [" << PrintLaneMask (UseMask) << " ]\n " );
@@ -316,7 +320,7 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock::iterator I,
316320 const VRegMaskPair VMP) {
317321 if (EnableTimers)
318322 GetDistanceTimer.startTimer ();
319-
323+
320324 unsigned Dist = Infinity;
321325 const MachineBasicBlock *MBB = I->getParent ();
322326 unsigned MBBNum = MBB->getNumber ();
@@ -326,8 +330,8 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock::iterator I,
326330 if (NextUseMap[MBBNum].InstrDist [&*I].contains (VMP.getVReg ())) {
327331 // printSortedRecords(Dists[VMP.VReg], VMP.VReg);
328332 unsigned SnapOff = NextUseMap[MBBNum].InstrOffset [&*I];
329- getFromSortedRecords (Dists[VMP.getVReg ()], VMP.getLaneMask (),
330- SnapOff, Dist);
333+ getFromSortedRecords (Dists[VMP.getVReg ()], VMP.getLaneMask (), SnapOff,
334+ Dist);
331335 }
332336 }
333337
@@ -340,7 +344,7 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
340344 const VRegMaskPair VMP) {
341345 if (EnableTimers)
342346 GetDistanceTimer.startTimer ();
343-
347+
344348 unsigned Dist = Infinity;
345349 unsigned MBBNum = MBB.getNumber ();
346350 if (NextUseMap.contains (MBBNum)) {
@@ -349,7 +353,7 @@ unsigned NextUseResult::getNextUseDistance(const MachineBasicBlock &MBB,
349353 VMP.getLaneMask (), 0 , Dist);
350354 }
351355 }
352-
356+
353357 if (EnableTimers)
354358 GetDistanceTimer.stopTimer ();
355359 return Dist;
0 commit comments