@@ -1102,46 +1102,10 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
11021102#define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
11031103
11041104#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1105- void PreRARematStage::printTargetRegions (bool PrintAll) const {
1106- if (PrintAll) {
1107- for (auto [I, Target] : enumerate(RPTargets))
1108- dbgs () << REMAT_PREFIX << " [" << I << " ] " << Target << ' \n ' ;
1109- return ;
1110- }
1111- if (TargetRegions.none ()) {
1112- dbgs () << REMAT_PREFIX << " No target regions\n " ;
1113- return ;
1114- }
1115- dbgs () << REMAT_PREFIX << " Target regions:\n " ;
1116- for (unsigned I : TargetRegions.set_bits ())
1117- dbgs () << REMAT_PREFIX << " [" << I << " ] " << RPTargets[I] << ' \n ' ;
1118- }
1119-
1120- void PreRARematStage::RematReg::print () const {
1121- dbgs () << REMAT_PREFIX << " [" << DefRegion << " ] " << *DefMI;
1122- dbgs () << REMAT_PREFIX << " -> used in [" << UseRegion << " ] " << *UseMI;
1123- dbgs () << REMAT_PREFIX << " Guaranteed RP reduction in:" ;
1124- for (unsigned I : Live.set_bits ()) {
1125- if (isUnusedLiveThrough (I))
1126- dbgs () << " [" << I << " ]" ;
1127- }
1128- dbgs () << ' \n ' ;
1129- dbgs () << REMAT_PREFIX << " Possible RP reduction in:" ;
1130- for (unsigned I : Live.set_bits ()) {
1131- if (!isUnusedLiveThrough (I))
1132- dbgs () << " [" << I << " ]" ;
1133- }
1134- dbgs () << ' \n ' ;
1135- }
1136-
1137- void PreRARematStage::ScoredRemat::print () const {
1138- ScoreTy ShiftScore = Score;
1139- ScoreTy RegionImpact = ShiftScore & ((1 << RegionImpactWidth) - 1 );
1140- ShiftScore >>= RegionImpactWidth;
1141- ScoreTy FreqDiff = ShiftScore & ((1 << FreqDiffWidth) - 1 );
1142- ShiftScore >>= FreqDiffWidth;
1143- ScoreTy MaxFreq = ShiftScore;
1144- dbgs () << ' (' << MaxFreq << " , " << FreqDiff << " , " << RegionImpact << ' )' ;
1105+ Printable PreRARematStage::ScoredRemat::print () const {
1106+ return Printable ([&](raw_ostream &OS) {
1107+ OS << ' (' << MaxFreq << " , " << FreqDiff << " , " << RegionImpact << ' )' ;
1108+ });
11451109}
11461110#endif
11471111
@@ -1172,6 +1136,38 @@ bool PreRARematStage::initGCNSchedStage() {
11721136 RegionBB.push_back (ParentMBB);
11731137 }
11741138
1139+ #ifndef NDEBUG
1140+ auto PrintTargetRegions = [&]() -> void {
1141+ if (TargetRegions.none ()) {
1142+ dbgs () << REMAT_PREFIX << " No target regions\n " ;
1143+ return ;
1144+ }
1145+ dbgs () << REMAT_PREFIX << " Target regions:\n " ;
1146+ for (unsigned I : TargetRegions.set_bits ())
1147+ dbgs () << REMAT_PREFIX << " [" << I << " ] " << RPTargets[I] << ' \n ' ;
1148+ };
1149+ auto PrintRematReg = [&](const RematReg &Remat) -> Printable {
1150+ return Printable ([&, Remat](raw_ostream &OS) {
1151+ // Concatenate all region numbers in which the register is unused and
1152+ // live-through.
1153+ std::string UnusedLTRegions;
1154+ for (unsigned I = 0 ; I < NumRegions; ++I) {
1155+ if (Remat.isUnusedLiveThrough (I)) {
1156+ if (!UnusedLTRegions.empty ())
1157+ UnusedLTRegions += " ," ;
1158+ UnusedLTRegions += std::to_string (I);
1159+ }
1160+ }
1161+ if (!UnusedLTRegions.empty ())
1162+ UnusedLTRegions = " - " + UnusedLTRegions + " -" ;
1163+ OS << " [" << Remat.DefRegion << " -" << UnusedLTRegions << " > "
1164+ << Remat.UseRegion << " ] " ;
1165+ Remat.DefMI ->print (OS, /* IsStandalone=*/ true , /* SkipOpers=*/ false ,
1166+ /* SkipDebugLoc=*/ false , /* AddNewLine=*/ false );
1167+ });
1168+ };
1169+ #endif
1170+
11751171 // Set an objective for the stage based on current RP in each region.
11761172 REMAT_DEBUG ({
11771173 dbgs () << " Analyzing " ;
@@ -1190,22 +1186,19 @@ bool PreRARematStage::initGCNSchedStage() {
11901186 dbgs () << " reduce spilling (minimum target occupancy is "
11911187 << MFI.getMinWavesPerEU () << " )\n " ;
11921188 }
1193- printTargetRegions ( /* PrintAll= */ TargetRegions. none () );
1189+ PrintTargetRegions ( );
11941190 });
11951191
11961192 if (!collectRematRegs (MIRegion)) {
11971193 REMAT_DEBUG (dbgs () << " No rematerializable registers\n " );
11981194 return false ;
11991195 }
1196+ const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
12001197 REMAT_DEBUG ({
12011198 dbgs () << " Rematerializable registers:\n " ;
12021199 for (const RematReg &Remat : RematRegs)
1203- Remat.print ();
1204- });
1205-
1206- const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
1207- REMAT_DEBUG ({
1208- dbgs () << " Region frequencies\n " ;
1200+ dbgs () << REMAT_PREFIX << " " << PrintRematReg (Remat) << ' \n ' ;
1201+ dbgs () << REMAT_PREFIX << " Region frequencies\n " ;
12091202 for (auto [I, Freq] : enumerate(FreqInfo.Regions )) {
12101203 dbgs () << REMAT_PREFIX << " [" << I << " ] " ;
12111204 if (Freq)
@@ -1227,52 +1220,53 @@ bool PreRARematStage::initGCNSchedStage() {
12271220#endif
12281221 BitVector RecomputeRP (NumRegions);
12291222 do {
1223+ assert (!ScoredRemats.empty () && " no more remat candidates" );
1224+
12301225 // (Re-)Score and (re-)sort all remats in increasing score order.
12311226 for (ScoredRemat &Remat : ScoredRemats)
12321227 Remat.update (TargetRegions, RPTargets, FreqInfo, !TargetOcc);
12331228 sort (ScoredRemats);
12341229
12351230 REMAT_DEBUG ({
1236- dbgs () << " ==== ROUND " << RoundNum << " ====\n " ;
1237- for (const ScoredRemat &SRemat : ScoredRemats) {
1238- dbgs () << REMAT_PREFIX;
1239- SRemat.print ();
1240- dbgs () << " | " << *SRemat.Remat ->DefMI ;
1231+ dbgs () << " ==== ROUND " << RoundNum << " ====\n "
1232+ << REMAT_PREFIX
1233+ << " Candidates with non-null score, in rematerialization order:\n " ;
1234+ for (const ScoredRemat &RematDecision : reverse (ScoredRemats)) {
1235+ if (RematDecision.hasNullScore ())
1236+ break ;
1237+ dbgs () << REMAT_PREFIX << " " << RematDecision.print () << " | "
1238+ << *RematDecision.Remat ->DefMI ;
12411239 }
1242- printTargetRegions ();
1240+ PrintTargetRegions ();
12431241 });
12441242
12451243 RecomputeRP.reset ();
1246- int RematIdx = ScoredRemats.size () - 1 ;
1244+ unsigned RematIdx = ScoredRemats.size ();
12471245
12481246 // Rematerialize registers in decreasing score order until we estimate
12491247 // that all RP targets are satisfied or until rematerialization candidates
12501248 // are no longer useful to decrease RP.
1251- for (; RematIdx >= 0 && TargetRegions.any (); --RematIdx) {
1252- const RematReg &Remat = *ScoredRemats[RematIdx].Remat ;
1253- // Stop on null score. Since scores monotonically decrease as we
1254- // rematerialize, we know there is nothing useful left to do in such
1255- // cases.
1256- if (ScoredRemats[RematIdx].hasNullScore ()) {
1257- REMAT_DEBUG (dbgs () << " *** Stop on null score | " << *Remat.DefMI );
1258- RematIdx = -1 ;
1249+ for (; RematIdx && TargetRegions.any (); --RematIdx) {
1250+ const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1 ];
1251+ // Stop rematerializing on encountering a null score. Since scores
1252+ // monotonically decrease as we rematerialize, we know there is nothing
1253+ // useful left to do in such cases, even if we were to re-score.
1254+ if (Candidate.hasNullScore ()) {
1255+ RematIdx = 0 ;
12591256 break ;
12601257 }
12611258
1259+ const RematReg &Remat = *Candidate.Remat ;
12621260 // When previous rematerializations in this round have already satisfied
12631261 // RP targets in all regions this rematerialization can impact, we have a
12641262 // good indication that our scores have diverged significantly from
12651263 // reality, in which case we interrupt this round and re-score. This also
12661264 // ensures that every rematerialization we perform is possibly impactful
12671265 // in at least one target region.
1268- if (!Remat.maybeBeneficial (TargetRegions, RPTargets)) {
1269- REMAT_DEBUG (dbgs () << " *** Stop round on stale score | "
1270- << *Remat.DefMI );
1266+ if (!Remat.maybeBeneficial (TargetRegions, RPTargets))
12711267 break ;
1272- }
12731268
1274- REMAT_DEBUG (dbgs () << " *** REMAT [" << Remat.DefRegion << " -> "
1275- << Remat.UseRegion << " ] | " << *Remat.DefMI );
1269+ REMAT_DEBUG (dbgs () << " ** REMAT " << PrintRematReg (Remat) << ' \n ' ;);
12761270 // Every rematerialization we do here is likely to move the instruction
12771271 // into a higher frequency region, increasing the total sum latency of the
12781272 // instruction itself. This is acceptable if we are eliminating a spill in
@@ -1289,14 +1283,18 @@ bool PreRARematStage::initGCNSchedStage() {
12891283 ++RoundNum;
12901284#endif
12911285 REMAT_DEBUG ({
1292- if (!TargetRegions.any ())
1293- dbgs () << " *** Stop round on all targets achieved\n " ;
1294- else if (RematIdx == -1 )
1295- dbgs () << " *** Stop round on exhausted remat opportunities\n " ;
1286+ if (!TargetRegions.any ()) {
1287+ dbgs () << " ** Interrupt round on all targets achieved\n " ;
1288+ } else if (RematIdx) {
1289+ dbgs () << " ** Interrupt round on stale score for "
1290+ << *ScoredRemats[RematIdx - 1 ].Remat ->DefMI ;
1291+ } else {
1292+ dbgs () << " ** Stop on exhausted rematerialization candidates\n " ;
1293+ }
12961294 });
12971295
12981296 // Peel off registers we already rematerialized from the vector's tail.
1299- ScoredRemats.truncate (RematIdx + 1 );
1297+ ScoredRemats.truncate (RematIdx);
13001298 } while ((updateAndVerifyRPTargets (RecomputeRP) || TargetRegions.any ()) &&
13011299 !ScoredRemats.empty ());
13021300 if (RescheduleRegions.none ())
@@ -2008,20 +2006,11 @@ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
20082006 if (!MinFreq)
20092007 return ;
20102008
2011- // Normalize to minimum observed frequency to avoid overflows when adding up
2012- // frequencies.
2009+ // Normalize to minimum observed frequency to avoid underflows/ overflows when
2010+ // combining frequencies.
20132011 for (uint64_t &Freq : Regions)
20142012 Freq /= MinFreq;
20152013 MaxFreq /= MinFreq;
2016-
2017- // Compute the scaling factor for scoring frequency differences.
2018- const uint64_t MaxDiff = MaxFreq - 1 ;
2019- const uint64_t MaxReprFreqValue = (1 << FreqDiffWidth) - 1 ;
2020- RescaleIsDenom = (2 * MaxDiff) & ~MaxReprFreqValue;
2021- if (RescaleIsDenom)
2022- RescaleFactor = (2 * MaxDiff) >> FreqDiffWidth;
2023- else
2024- RescaleFactor = MaxDiff ? MaxReprFreqValue / (2 * MaxDiff) : 1 ;
20252014}
20262015
20272016PreRARematStage::ScoredRemat::ScoredRemat (const RematReg *Remat,
@@ -2044,7 +2033,7 @@ unsigned PreRARematStage::ScoredRemat::getNumRegs(
20442033 return divideCeil (RegSize, 32 );
20452034}
20462035
2047- uint64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Freq) const {
2036+ int64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Freq) const {
20482037 // Get frequencies of defining and using regions. A rematerialization from the
20492038 // least frequent region to the most frequent region will yield the greatest
20502039 // latency penalty and therefore should get minimum score. Reciprocally, a
@@ -2056,36 +2045,22 @@ uint64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const {
20562045 uint64_t UseOrMax = Freq.Regions [Remat->UseRegion ];
20572046 if (!UseOrMax)
20582047 UseOrMax = Freq.MaxFreq ;
2059-
2060- // Maximum difference in frequency between defining and using regions.
2061- const uint64_t MaxDiff = Freq.MaxFreq - 1 ;
2062- // The difference between defining and using frequency is in the range
2063- // [-MaxDiff, MaxDiff], shift it to [0,2 x MaxDiff] to stay in the positive
2064- // range, then rescale to the representable range in the final score.
2065- const uint64_t FreqDiff = (MaxDiff + (DefOrOne - UseOrMax));
2066- if (Freq.RescaleIsDenom )
2067- return FreqDiff / Freq.RescaleFactor ;
2068- return FreqDiff * Freq.RescaleFactor ;
2048+ return DefOrOne - UseOrMax;
20692049}
20702050
20712051void PreRARematStage::ScoredRemat::update (const BitVector &TargetRegions,
20722052 ArrayRef<GCNRPTarget> RPTargets,
20732053 const FreqInfo &FreqInfo,
20742054 bool ReduceSpill) {
2075- setNullScore ();
2076- if (!Remat->maybeBeneficial (TargetRegions, RPTargets))
2077- return ;
2078-
2079- Register Reg = Remat->getReg ();
2080- uint64_t MaxFreq = 0 ;
2081- ScoreTy NumBenefitingRegions = 0 ;
2055+ MaxFreq = 0 ;
2056+ RegionImpact = 0 ;
20822057 for (unsigned I : TargetRegions.set_bits ()) {
2083- if (!Remat->Live [I] || !RPTargets[I].isSaveBeneficial (Reg ))
2058+ if (!Remat->Live [I] || !RPTargets[I].isSaveBeneficial (Remat-> getReg () ))
20842059 continue ;
20852060 bool UnusedLT = Remat->isUnusedLiveThrough (I);
20862061
20872062 // Regions in which RP is guaranteed to decrease have more weight.
2088- NumBenefitingRegions += UnusedLT ? 2 : 1 ;
2063+ RegionImpact += UnusedLT ? 2 : 1 ;
20892064
20902065 if (ReduceSpill) {
20912066 uint64_t Freq = FreqInfo.Regions [I];
@@ -2097,9 +2072,6 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
20972072 MaxFreq = std::max (MaxFreq, Freq);
20982073 }
20992074 }
2100- setMaxFreqScore (MaxFreq);
2101- setFreqDiffScore (FreqDiff);
2102- setRegionImpactScore (NumBenefitingRegions * NumRegs);
21032075}
21042076
21052077void PreRARematStage::rematerialize (const RematReg &Remat,
0 commit comments