@@ -1279,46 +1279,10 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
12791279#define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
12801280
12811281#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1282- void PreRARematStage::printTargetRegions (bool PrintAll) const {
1283- if (PrintAll) {
1284- for (auto [I, Target] : enumerate(RPTargets))
1285- dbgs () << REMAT_PREFIX << " [" << I << " ] " << Target << ' \n ' ;
1286- return ;
1287- }
1288- if (TargetRegions.none ()) {
1289- dbgs () << REMAT_PREFIX << " No target regions\n " ;
1290- return ;
1291- }
1292- dbgs () << REMAT_PREFIX << " Target regions:\n " ;
1293- for (unsigned I : TargetRegions.set_bits ())
1294- dbgs () << REMAT_PREFIX << " [" << I << " ] " << RPTargets[I] << ' \n ' ;
1295- }
1296-
1297- void PreRARematStage::RematReg::print () const {
1298- dbgs () << REMAT_PREFIX << " [" << DefRegion << " ] " << *DefMI;
1299- dbgs () << REMAT_PREFIX << " -> used in [" << UseRegion << " ] " << *UseMI;
1300- dbgs () << REMAT_PREFIX << " Guaranteed RP reduction in:" ;
1301- for (unsigned I : Live.set_bits ()) {
1302- if (isUnusedLiveThrough (I))
1303- dbgs () << " [" << I << " ]" ;
1304- }
1305- dbgs () << ' \n ' ;
1306- dbgs () << REMAT_PREFIX << " Possible RP reduction in:" ;
1307- for (unsigned I : Live.set_bits ()) {
1308- if (!isUnusedLiveThrough (I))
1309- dbgs () << " [" << I << " ]" ;
1310- }
1311- dbgs () << ' \n ' ;
1312- }
1313-
1314- void PreRARematStage::ScoredRemat::print () const {
1315- ScoreTy ShiftScore = Score;
1316- ScoreTy RegionImpact = ShiftScore & ((1 << RegionImpactWidth) - 1 );
1317- ShiftScore >>= RegionImpactWidth;
1318- ScoreTy FreqDiff = ShiftScore & ((1 << FreqDiffWidth) - 1 );
1319- ShiftScore >>= FreqDiffWidth;
1320- ScoreTy MaxFreq = ShiftScore;
1321- dbgs () << ' (' << MaxFreq << " , " << FreqDiff << " , " << RegionImpact << ' )' ;
1282+ Printable PreRARematStage::ScoredRemat::print () const {
1283+ return Printable ([&](raw_ostream &OS) {
1284+ OS << ' (' << MaxFreq << " , " << FreqDiff << " , " << RegionImpact << ' )' ;
1285+ });
13221286}
13231287#endif
13241288
@@ -1349,6 +1313,38 @@ bool PreRARematStage::initGCNSchedStage() {
13491313 RegionBB.push_back (ParentMBB);
13501314 }
13511315
1316+ #ifndef NDEBUG
1317+ auto PrintTargetRegions = [&]() -> void {
1318+ if (TargetRegions.none ()) {
1319+ dbgs () << REMAT_PREFIX << " No target regions\n " ;
1320+ return ;
1321+ }
1322+ dbgs () << REMAT_PREFIX << " Target regions:\n " ;
1323+ for (unsigned I : TargetRegions.set_bits ())
1324+ dbgs () << REMAT_PREFIX << " [" << I << " ] " << RPTargets[I] << ' \n ' ;
1325+ };
1326+ auto PrintRematReg = [&](const RematReg &Remat) -> Printable {
1327+ return Printable ([&, Remat](raw_ostream &OS) {
1328+ // Concatenate all region numbers in which the register is unused and
1329+ // live-through.
1330+ std::string UnusedLTRegions;
1331+ for (unsigned I = 0 ; I < NumRegions; ++I) {
1332+ if (Remat.isUnusedLiveThrough (I)) {
1333+ if (!UnusedLTRegions.empty ())
1334+ UnusedLTRegions += " ," ;
1335+ UnusedLTRegions += std::to_string (I);
1336+ }
1337+ }
1338+ if (!UnusedLTRegions.empty ())
1339+ UnusedLTRegions = " - " + UnusedLTRegions + " -" ;
1340+ OS << " [" << Remat.DefRegion << " -" << UnusedLTRegions << " > "
1341+ << Remat.UseRegion << " ] " ;
1342+ Remat.DefMI ->print (OS, /* IsStandalone=*/ true , /* SkipOpers=*/ false ,
1343+ /* SkipDebugLoc=*/ false , /* AddNewLine=*/ false );
1344+ });
1345+ };
1346+ #endif
1347+
13521348 // Set an objective for the stage based on current RP in each region.
13531349 REMAT_DEBUG ({
13541350 dbgs () << " Analyzing " ;
@@ -1367,22 +1363,19 @@ bool PreRARematStage::initGCNSchedStage() {
13671363 dbgs () << " reduce spilling (minimum target occupancy is "
13681364 << MFI.getMinWavesPerEU () << " )\n " ;
13691365 }
1370- printTargetRegions ( /* PrintAll= */ TargetRegions. none () );
1366+ PrintTargetRegions ( );
13711367 });
13721368
13731369 if (!collectRematRegs (MIRegion)) {
13741370 REMAT_DEBUG (dbgs () << " No rematerializable registers\n " );
13751371 return false ;
13761372 }
1373+ const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
13771374 REMAT_DEBUG ({
13781375 dbgs () << " Rematerializable registers:\n " ;
13791376 for (const RematReg &Remat : RematRegs)
1380- Remat.print ();
1381- });
1382-
1383- const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
1384- REMAT_DEBUG ({
1385- dbgs () << " Region frequencies\n " ;
1377+ dbgs () << REMAT_PREFIX << " " << PrintRematReg (Remat) << ' \n ' ;
1378+ dbgs () << REMAT_PREFIX << " Region frequencies\n " ;
13861379 for (auto [I, Freq] : enumerate(FreqInfo.Regions )) {
13871380 dbgs () << REMAT_PREFIX << " [" << I << " ] " ;
13881381 if (Freq)
@@ -1404,52 +1397,53 @@ bool PreRARematStage::initGCNSchedStage() {
14041397#endif
14051398 BitVector RecomputeRP (NumRegions);
14061399 do {
1400+ assert (!ScoredRemats.empty () && " no more remat candidates" );
1401+
14071402 // (Re-)Score and (re-)sort all remats in increasing score order.
14081403 for (ScoredRemat &Remat : ScoredRemats)
14091404 Remat.update (TargetRegions, RPTargets, FreqInfo, !TargetOcc);
14101405 sort (ScoredRemats);
14111406
14121407 REMAT_DEBUG ({
1413- dbgs () << " ==== ROUND " << RoundNum << " ====\n " ;
1414- for (const ScoredRemat &SRemat : ScoredRemats) {
1415- dbgs () << REMAT_PREFIX;
1416- SRemat.print ();
1417- dbgs () << " | " << *SRemat.Remat ->DefMI ;
1408+ dbgs () << " ==== ROUND " << RoundNum << " ====\n "
1409+ << REMAT_PREFIX
1410+ << " Candidates with non-null score, in rematerialization order:\n " ;
1411+ for (const ScoredRemat &RematDecision : reverse (ScoredRemats)) {
1412+ if (RematDecision.hasNullScore ())
1413+ break ;
1414+ dbgs () << REMAT_PREFIX << " " << RematDecision.print () << " | "
1415+ << *RematDecision.Remat ->DefMI ;
14181416 }
1419- printTargetRegions ();
1417+ PrintTargetRegions ();
14201418 });
14211419
14221420 RecomputeRP.reset ();
1423- int RematIdx = ScoredRemats.size () - 1 ;
1421+ unsigned RematIdx = ScoredRemats.size ();
14241422
14251423 // Rematerialize registers in decreasing score order until we estimate
14261424 // that all RP targets are satisfied or until rematerialization candidates
14271425 // are no longer useful to decrease RP.
1428- for (; RematIdx >= 0 && TargetRegions.any (); --RematIdx) {
1429- const RematReg &Remat = *ScoredRemats[RematIdx].Remat ;
1430- // Stop on null score. Since scores monotonically decrease as we
1431- // rematerialize, we know there is nothing useful left to do in such
1432- // cases.
1433- if (ScoredRemats[RematIdx].hasNullScore ()) {
1434- REMAT_DEBUG (dbgs () << " *** Stop on null score | " << *Remat.DefMI );
1435- RematIdx = -1 ;
1426+ for (; RematIdx && TargetRegions.any (); --RematIdx) {
1427+ const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1 ];
1428+ // Stop rematerializing on encountering a null score. Since scores
1429+ // monotonically decrease as we rematerialize, we know there is nothing
1430+ // useful left to do in such cases, even if we were to re-score.
1431+ if (Candidate.hasNullScore ()) {
1432+ RematIdx = 0 ;
14361433 break ;
14371434 }
14381435
1436+ const RematReg &Remat = *Candidate.Remat ;
14391437 // When previous rematerializations in this round have already satisfied
14401438 // RP targets in all regions this rematerialization can impact, we have a
14411439 // good indication that our scores have diverged significantly from
14421440 // reality, in which case we interrupt this round and re-score. This also
14431441 // ensures that every rematerialization we perform is possibly impactful
14441442 // in at least one target region.
1445- if (!Remat.maybeBeneficial (TargetRegions, RPTargets)) {
1446- REMAT_DEBUG (dbgs () << " *** Stop round on stale score | "
1447- << *Remat.DefMI );
1443+ if (!Remat.maybeBeneficial (TargetRegions, RPTargets))
14481444 break ;
1449- }
14501445
1451- REMAT_DEBUG (dbgs () << " *** REMAT [" << Remat.DefRegion << " -> "
1452- << Remat.UseRegion << " ] | " << *Remat.DefMI );
1446+ REMAT_DEBUG (dbgs () << " ** REMAT " << PrintRematReg (Remat) << ' \n ' ;);
14531447 // Every rematerialization we do here is likely to move the instruction
14541448 // into a higher frequency region, increasing the total sum latency of the
14551449 // instruction itself. This is acceptable if we are eliminating a spill in
@@ -1466,14 +1460,18 @@ bool PreRARematStage::initGCNSchedStage() {
14661460 ++RoundNum;
14671461#endif
14681462 REMAT_DEBUG ({
1469- if (!TargetRegions.any ())
1470- dbgs () << " *** Stop round on all targets achieved\n " ;
1471- else if (RematIdx == -1 )
1472- dbgs () << " *** Stop round on exhausted remat opportunities\n " ;
1463+ if (!TargetRegions.any ()) {
1464+ dbgs () << " ** Interrupt round on all targets achieved\n " ;
1465+ } else if (RematIdx) {
1466+ dbgs () << " ** Interrupt round on stale score for "
1467+ << *ScoredRemats[RematIdx - 1 ].Remat ->DefMI ;
1468+ } else {
1469+ dbgs () << " ** Stop on exhausted rematerialization candidates\n " ;
1470+ }
14731471 });
14741472
14751473 // Peel off registers we already rematerialized from the vector's tail.
1476- ScoredRemats.truncate (RematIdx + 1 );
1474+ ScoredRemats.truncate (RematIdx);
14771475 } while ((updateAndVerifyRPTargets (RecomputeRP) || TargetRegions.any ()) &&
14781476 !ScoredRemats.empty ());
14791477 if (RescheduleRegions.none ())
@@ -2185,20 +2183,11 @@ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
21852183 if (!MinFreq)
21862184 return ;
21872185
2188- // Normalize to minimum observed frequency to avoid overflows when adding up
2189- // frequencies.
2186+ // Normalize to minimum observed frequency to avoid underflows/ overflows when
2187+ // combining frequencies.
21902188 for (uint64_t &Freq : Regions)
21912189 Freq /= MinFreq;
21922190 MaxFreq /= MinFreq;
2193-
2194- // Compute the scaling factor for scoring frequency differences.
2195- const uint64_t MaxDiff = MaxFreq - 1 ;
2196- const uint64_t MaxReprFreqValue = (1 << FreqDiffWidth) - 1 ;
2197- RescaleIsDenom = (2 * MaxDiff) & ~MaxReprFreqValue;
2198- if (RescaleIsDenom)
2199- RescaleFactor = (2 * MaxDiff) >> FreqDiffWidth;
2200- else
2201- RescaleFactor = MaxDiff ? MaxReprFreqValue / (2 * MaxDiff) : 1 ;
22022191}
22032192
22042193PreRARematStage::ScoredRemat::ScoredRemat (const RematReg *Remat,
@@ -2221,7 +2210,7 @@ unsigned PreRARematStage::ScoredRemat::getNumRegs(
22212210 return divideCeil (RegSize, 32 );
22222211}
22232212
2224- uint64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Freq) const {
2213+ int64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Freq) const {
22252214 // Get frequencies of defining and using regions. A rematerialization from the
22262215 // least frequent region to the most frequent region will yield the greatest
22272216 // latency penalty and therefore should get minimum score. Reciprocally, a
@@ -2233,36 +2222,22 @@ uint64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const {
22332222 uint64_t UseOrMax = Freq.Regions [Remat->UseRegion ];
22342223 if (!UseOrMax)
22352224 UseOrMax = Freq.MaxFreq ;
2236-
2237- // Maximum difference in frequency between defining and using regions.
2238- const uint64_t MaxDiff = Freq.MaxFreq - 1 ;
2239- // The difference between defining and using frequency is in the range
2240- // [-MaxDiff, MaxDiff], shift it to [0,2 x MaxDiff] to stay in the positive
2241- // range, then rescale to the representable range in the final score.
2242- const uint64_t FreqDiff = (MaxDiff + (DefOrOne - UseOrMax));
2243- if (Freq.RescaleIsDenom )
2244- return FreqDiff / Freq.RescaleFactor ;
2245- return FreqDiff * Freq.RescaleFactor ;
2225+ return DefOrOne - UseOrMax;
22462226}
22472227
22482228void PreRARematStage::ScoredRemat::update (const BitVector &TargetRegions,
22492229 ArrayRef<GCNRPTarget> RPTargets,
22502230 const FreqInfo &FreqInfo,
22512231 bool ReduceSpill) {
2252- setNullScore ();
2253- if (!Remat->maybeBeneficial (TargetRegions, RPTargets))
2254- return ;
2255-
2256- Register Reg = Remat->getReg ();
2257- uint64_t MaxFreq = 0 ;
2258- ScoreTy NumBenefitingRegions = 0 ;
2232+ MaxFreq = 0 ;
2233+ RegionImpact = 0 ;
22592234 for (unsigned I : TargetRegions.set_bits ()) {
2260- if (!Remat->Live [I] || !RPTargets[I].isSaveBeneficial (Reg ))
2235+ if (!Remat->Live [I] || !RPTargets[I].isSaveBeneficial (Remat-> getReg () ))
22612236 continue ;
22622237 bool UnusedLT = Remat->isUnusedLiveThrough (I);
22632238
22642239 // Regions in which RP is guaranteed to decrease have more weight.
2265- NumBenefitingRegions += UnusedLT ? 2 : 1 ;
2240+ RegionImpact += UnusedLT ? 2 : 1 ;
22662241
22672242 if (ReduceSpill) {
22682243 uint64_t Freq = FreqInfo.Regions [I];
@@ -2274,9 +2249,6 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
22742249 MaxFreq = std::max (MaxFreq, Freq);
22752250 }
22762251 }
2277- setMaxFreqScore (MaxFreq);
2278- setFreqDiffScore (FreqDiff);
2279- setRegionImpactScore (NumBenefitingRegions * NumRegs);
22802252}
22812253
22822254void PreRARematStage::rematerialize (const RematReg &Remat,
0 commit comments