@@ -711,8 +711,11 @@ class WaitcntBrackets {
711711
712712 void clearVgprVmemTypes (MCPhysReg Reg) {
713713 for (MCRegUnit RU : regunits (Reg)) {
714- if (auto It = VMem.find (toVMEMID (RU)); It != VMem.end ())
714+ if (auto It = VMem.find (toVMEMID (RU)); It != VMem.end ()) {
715715 It->second .VMEMTypes = 0 ;
716+ if (It->second .empty ())
717+ VMem.erase (It);
718+ }
716719 }
717720 }
718721
@@ -733,6 +736,10 @@ class WaitcntBrackets {
733736 void print (raw_ostream &) const ;
734737 void dump () const { print (dbgs ()); }
735738
739+ // Free up memory by removing empty entries from the DenseMap that track event
740+ // scores.
741+ void purgeEmptyTrackingData ();
742+
736743private:
737744 struct MergeInfo {
738745 unsigned OldLB;
@@ -820,17 +827,23 @@ class WaitcntBrackets {
820827
821828 struct VGPRInfo {
822829 // Scores for all instruction counters.
823- unsigned Scores[ NUM_INST_CNTS] = {0 };
830+ std::array< unsigned , NUM_INST_CNTS> Scores = {0 };
824831 // Bitmask of the VmemTypes of VMEM instructions for this VGPR.
825832 unsigned VMEMTypes = 0 ;
833+
834+ bool empty () const {
835+ return all_of (Scores, [](unsigned K) { return K == 0 ; }) && !VMEMTypes;
836+ }
826837 };
827838
828839 struct SGPRInfo {
829840 // Wait cnt scores for every sgpr, the DS_CNT (corresponding to LGKMcnt
830841 // pre-gfx12) or KM_CNT (gfx12+ only), and X_CNT (gfx1250) are relevant.
831842 // Row 0 represents the score for either DS_CNT or KM_CNT and row 1 keeps
832843 // the X_CNT score.
833- unsigned Scores[2 ] = {0 };
844+ std::array<unsigned , 2 > Scores = {0 };
845+
846+ bool empty () const { return !Scores[0 ] && !Scores[1 ]; }
834847 };
835848
836849 DenseMap<VMEMID, VGPRInfo> VMem; // VGPR + LDS DMA
@@ -1200,6 +1213,17 @@ void WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
12001213 Count = ~0u ;
12011214}
12021215
1216+ void WaitcntBrackets::purgeEmptyTrackingData () {
1217+ for (auto &[K, V] : make_early_inc_range (VMem)) {
1218+ if (V.empty ())
1219+ VMem.erase (K);
1220+ }
1221+ for (auto &[K, V] : make_early_inc_range (SGPRs)) {
1222+ if (V.empty ())
1223+ SGPRs.erase (K);
1224+ }
1225+ }
1226+
12031227void WaitcntBrackets::determineWaitForScore (InstCounterType T,
12041228 unsigned ScoreToWait,
12051229 AMDGPU::Waitcnt &Wait) const {
@@ -2429,6 +2453,7 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
24292453 Value.VMEMTypes = NewVmemTypes;
24302454 }
24312455
2456+ purgeEmptyTrackingData ();
24322457 return StrictDom;
24332458}
24342459
0 commit comments