@@ -704,8 +704,11 @@ class WaitcntBrackets {
704704
705705 void clearVgprVmemTypes (MCPhysReg Reg) {
706706 for (MCRegUnit RU : regunits (Reg)) {
707- if (auto It = VMem.find (toVMEMID (RU)); It != VMem.end ())
707+ if (auto It = VMem.find (toVMEMID (RU)); It != VMem.end ()) {
708708 It->second .VMEMTypes = 0 ;
709+ if (It->second .empty ())
710+ VMem.erase (It);
711+ }
709712 }
710713 }
711714
@@ -726,6 +729,10 @@ class WaitcntBrackets {
726729 void print (raw_ostream &) const ;
727730 void dump () const { print (dbgs ()); }
728731
732+ // Free up memory by removing empty entries from the DenseMap that track event
733+ // scores.
734+ void purgeEmptyTrackingData ();
735+
729736private:
730737 struct MergeInfo {
731738 unsigned OldLB;
@@ -813,17 +820,23 @@ class WaitcntBrackets {
813820
814821 struct VGPRInfo {
815822 // Scores for all instruction counters.
816- unsigned Scores[ NUM_INST_CNTS] = {0 };
823+ std::array< unsigned , NUM_INST_CNTS> Scores = {0 };
817824 // Bitmask of the VmemTypes of VMEM instructions for this VGPR.
818825 unsigned VMEMTypes = 0 ;
826+
827+ bool empty () const {
828+ return all_of (Scores, [](unsigned K) { return K == 0 ; }) && !VMEMTypes;
829+ }
819830 };
820831
821832 struct SGPRInfo {
822833 // Wait cnt scores for every sgpr, the DS_CNT (corresponding to LGKMcnt
823834 // pre-gfx12) or KM_CNT (gfx12+ only), and X_CNT (gfx1250) are relevant.
824835 // Row 0 represents the score for either DS_CNT or KM_CNT and row 1 keeps
825836 // the X_CNT score.
826- unsigned Scores[2 ] = {0 };
837+ std::array<unsigned , 2 > Scores = {0 };
838+
839+ bool empty () const { return !Scores[0 ] && !Scores[1 ]; }
827840 };
828841
829842 DenseMap<VMEMID, VGPRInfo> VMem; // VGPR + LDS DMA
@@ -1196,6 +1209,17 @@ void WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
11961209 Count = ~0u ;
11971210}
11981211
1212+ void WaitcntBrackets::purgeEmptyTrackingData () {
1213+ for (auto &[K, V] : make_early_inc_range (VMem)) {
1214+ if (V.empty ())
1215+ VMem.erase (K);
1216+ }
1217+ for (auto &[K, V] : make_early_inc_range (SGPRs)) {
1218+ if (V.empty ())
1219+ SGPRs.erase (K);
1220+ }
1221+ }
1222+
11991223void WaitcntBrackets::determineWaitForScore (InstCounterType T,
12001224 unsigned ScoreToWait,
12011225 AMDGPU::Waitcnt &Wait) const {
@@ -2435,6 +2459,7 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
24352459 Value.VMEMTypes = NewVmemTypes;
24362460 }
24372461
2462+ purgeEmptyTrackingData ();
24382463 return StrictDom;
24392464}
24402465
0 commit comments