@@ -1281,46 +1281,10 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
12811281#define REMAT_DEBUG (X ) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
12821282
12831283#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1284- void PreRARematStage::printTargetRegions (bool PrintAll) const {
1285- if (PrintAll) {
1286- for (auto [I, Target] : enumerate(RPTargets))
1287- dbgs () << REMAT_PREFIX << " [" << I << " ] " << Target << ' \n ' ;
1288- return ;
1289- }
1290- if (TargetRegions.none ()) {
1291- dbgs () << REMAT_PREFIX << " No target regions\n " ;
1292- return ;
1293- }
1294- dbgs () << REMAT_PREFIX << " Target regions:\n " ;
1295- for (unsigned I : TargetRegions.set_bits ())
1296- dbgs () << REMAT_PREFIX << " [" << I << " ] " << RPTargets[I] << ' \n ' ;
1297- }
1298-
1299- void PreRARematStage::RematReg::print () const {
1300- dbgs () << REMAT_PREFIX << " [" << DefRegion << " ] " << *DefMI;
1301- dbgs () << REMAT_PREFIX << " -> used in [" << UseRegion << " ] " << *UseMI;
1302- dbgs () << REMAT_PREFIX << " Guaranteed RP reduction in:" ;
1303- for (unsigned I : Live.set_bits ()) {
1304- if (isUnusedLiveThrough (I))
1305- dbgs () << " [" << I << " ]" ;
1306- }
1307- dbgs () << ' \n ' ;
1308- dbgs () << REMAT_PREFIX << " Possible RP reduction in:" ;
1309- for (unsigned I : Live.set_bits ()) {
1310- if (!isUnusedLiveThrough (I))
1311- dbgs () << " [" << I << " ]" ;
1312- }
1313- dbgs () << ' \n ' ;
1314- }
1315-
1316- void PreRARematStage::ScoredRemat::print () const {
1317- ScoreTy ShiftScore = Score;
1318- ScoreTy RegionImpact = ShiftScore & ((1 << RegionImpactWidth) - 1 );
1319- ShiftScore >>= RegionImpactWidth;
1320- ScoreTy FreqDiff = ShiftScore & ((1 << FreqDiffWidth) - 1 );
1321- ShiftScore >>= FreqDiffWidth;
1322- ScoreTy MaxFreq = ShiftScore;
1323- dbgs () << ' (' << MaxFreq << " , " << FreqDiff << " , " << RegionImpact << ' )' ;
1284+ Printable PreRARematStage::ScoredRemat::print () const {
1285+ return Printable ([&](raw_ostream &OS) {
1286+ OS << ' (' << MaxFreq << " , " << FreqDiff << " , " << RegionImpact << ' )' ;
1287+ });
13241288}
13251289#endif
13261290
@@ -1351,6 +1315,38 @@ bool PreRARematStage::initGCNSchedStage() {
13511315 RegionBB.push_back (ParentMBB);
13521316 }
13531317
1318+ #ifndef NDEBUG
1319+ auto PrintTargetRegions = [&]() -> void {
1320+ if (TargetRegions.none ()) {
1321+ dbgs () << REMAT_PREFIX << " No target regions\n " ;
1322+ return ;
1323+ }
1324+ dbgs () << REMAT_PREFIX << " Target regions:\n " ;
1325+ for (unsigned I : TargetRegions.set_bits ())
1326+ dbgs () << REMAT_PREFIX << " [" << I << " ] " << RPTargets[I] << ' \n ' ;
1327+ };
1328+ auto PrintRematReg = [&](const RematReg &Remat) -> Printable {
1329+ return Printable ([&, Remat](raw_ostream &OS) {
1330+ // Concatenate all region numbers in which the register is unused and
1331+ // live-through.
1332+ std::string UnusedLTRegions;
1333+ for (unsigned I = 0 ; I < NumRegions; ++I) {
1334+ if (Remat.isUnusedLiveThrough (I)) {
1335+ if (!UnusedLTRegions.empty ())
1336+ UnusedLTRegions += " ," ;
1337+ UnusedLTRegions += std::to_string (I);
1338+ }
1339+ }
1340+ if (!UnusedLTRegions.empty ())
1341+ UnusedLTRegions = " - " + UnusedLTRegions + " -" ;
1342+ OS << " [" << Remat.DefRegion << " -" << UnusedLTRegions << " > "
1343+ << Remat.UseRegion << " ] " ;
1344+ Remat.DefMI ->print (OS, /* IsStandalone=*/ true , /* SkipOpers=*/ false ,
1345+ /* SkipDebugLoc=*/ false , /* AddNewLine=*/ false );
1346+ });
1347+ };
1348+ #endif
1349+
13541350 // Set an objective for the stage based on current RP in each region.
13551351 REMAT_DEBUG ({
13561352 dbgs () << " Analyzing " ;
@@ -1369,22 +1365,19 @@ bool PreRARematStage::initGCNSchedStage() {
13691365 dbgs () << " reduce spilling (minimum target occupancy is "
13701366 << MFI.getMinWavesPerEU () << " )\n " ;
13711367 }
1372- printTargetRegions ( /* PrintAll= */ TargetRegions. none () );
1368+ PrintTargetRegions ( );
13731369 });
13741370
13751371 if (!collectRematRegs (MIRegion)) {
13761372 REMAT_DEBUG (dbgs () << " No rematerializable registers\n " );
13771373 return false ;
13781374 }
1375+ const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
13791376 REMAT_DEBUG ({
13801377 dbgs () << " Rematerializable registers:\n " ;
13811378 for (const RematReg &Remat : RematRegs)
1382- Remat.print ();
1383- });
1384-
1385- const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
1386- REMAT_DEBUG ({
1387- dbgs () << " Region frequencies\n " ;
1379+ dbgs () << REMAT_PREFIX << " " << PrintRematReg (Remat) << ' \n ' ;
1380+ dbgs () << REMAT_PREFIX << " Region frequencies\n " ;
13881381 for (auto [I, Freq] : enumerate(FreqInfo.Regions )) {
13891382 dbgs () << REMAT_PREFIX << " [" << I << " ] " ;
13901383 if (Freq)
@@ -1406,52 +1399,53 @@ bool PreRARematStage::initGCNSchedStage() {
14061399#endif
14071400 BitVector RecomputeRP (NumRegions);
14081401 do {
1402+ assert (!ScoredRemats.empty () && " no more remat candidates" );
1403+
14091404 // (Re-)Score and (re-)sort all remats in increasing score order.
14101405 for (ScoredRemat &Remat : ScoredRemats)
14111406 Remat.update (TargetRegions, RPTargets, FreqInfo, !TargetOcc);
14121407 sort (ScoredRemats);
14131408
14141409 REMAT_DEBUG ({
1415- dbgs () << " ==== ROUND " << RoundNum << " ====\n " ;
1416- for (const ScoredRemat &SRemat : ScoredRemats) {
1417- dbgs () << REMAT_PREFIX;
1418- SRemat.print ();
1419- dbgs () << " | " << *SRemat.Remat ->DefMI ;
1410+ dbgs () << " ==== ROUND " << RoundNum << " ====\n "
1411+ << REMAT_PREFIX
1412+ << " Candidates with non-null score, in rematerialization order:\n " ;
1413+ for (const ScoredRemat &RematDecision : reverse (ScoredRemats)) {
1414+ if (RematDecision.hasNullScore ())
1415+ break ;
1416+ dbgs () << REMAT_PREFIX << " " << RematDecision.print () << " | "
1417+ << *RematDecision.Remat ->DefMI ;
14201418 }
1421- printTargetRegions ();
1419+ PrintTargetRegions ();
14221420 });
14231421
14241422 RecomputeRP.reset ();
1425- int RematIdx = ScoredRemats.size () - 1 ;
1423+ unsigned RematIdx = ScoredRemats.size ();
14261424
14271425 // Rematerialize registers in decreasing score order until we estimate
14281426 // that all RP targets are satisfied or until rematerialization candidates
14291427 // are no longer useful to decrease RP.
1430- for (; RematIdx >= 0 && TargetRegions.any (); --RematIdx) {
1431- const RematReg &Remat = *ScoredRemats[RematIdx].Remat ;
1432- // Stop on null score. Since scores monotonically decrease as we
1433- // rematerialize, we know there is nothing useful left to do in such
1434- // cases.
1435- if (ScoredRemats[RematIdx].hasNullScore ()) {
1436- REMAT_DEBUG (dbgs () << " *** Stop on null score | " << *Remat.DefMI );
1437- RematIdx = -1 ;
1428+ for (; RematIdx && TargetRegions.any (); --RematIdx) {
1429+ const ScoredRemat &Candidate = ScoredRemats[RematIdx - 1 ];
1430+ // Stop rematerializing on encountering a null score. Since scores
1431+ // monotonically decrease as we rematerialize, we know there is nothing
1432+ // useful left to do in such cases, even if we were to re-score.
1433+ if (Candidate.hasNullScore ()) {
1434+ RematIdx = 0 ;
14381435 break ;
14391436 }
14401437
1438+ const RematReg &Remat = *Candidate.Remat ;
14411439 // When previous rematerializations in this round have already satisfied
14421440 // RP targets in all regions this rematerialization can impact, we have a
14431441 // good indication that our scores have diverged significantly from
14441442 // reality, in which case we interrupt this round and re-score. This also
14451443 // ensures that every rematerialization we perform is possibly impactful
14461444 // in at least one target region.
1447- if (!Remat.maybeBeneficial (TargetRegions, RPTargets)) {
1448- REMAT_DEBUG (dbgs () << " *** Stop round on stale score | "
1449- << *Remat.DefMI );
1445+ if (!Remat.maybeBeneficial (TargetRegions, RPTargets))
14501446 break ;
1451- }
14521447
1453- REMAT_DEBUG (dbgs () << " *** REMAT [" << Remat.DefRegion << " -> "
1454- << Remat.UseRegion << " ] | " << *Remat.DefMI );
1448+ REMAT_DEBUG (dbgs () << " ** REMAT " << PrintRematReg (Remat) << ' \n ' ;);
14551449 // Every rematerialization we do here is likely to move the instruction
14561450 // into a higher frequency region, increasing the total sum latency of the
14571451 // instruction itself. This is acceptable if we are eliminating a spill in
@@ -1468,14 +1462,18 @@ bool PreRARematStage::initGCNSchedStage() {
14681462 ++RoundNum;
14691463#endif
14701464 REMAT_DEBUG ({
1471- if (!TargetRegions.any ())
1472- dbgs () << " *** Stop round on all targets achieved\n " ;
1473- else if (RematIdx == -1 )
1474- dbgs () << " *** Stop round on exhausted remat opportunities\n " ;
1465+ if (!TargetRegions.any ()) {
1466+ dbgs () << " ** Interrupt round on all targets achieved\n " ;
1467+ } else if (RematIdx) {
1468+ dbgs () << " ** Interrupt round on stale score for "
1469+ << *ScoredRemats[RematIdx - 1 ].Remat ->DefMI ;
1470+ } else {
1471+ dbgs () << " ** Stop on exhausted rematerialization candidates\n " ;
1472+ }
14751473 });
14761474
14771475 // Peel off registers we already rematerialized from the vector's tail.
1478- ScoredRemats.truncate (RematIdx + 1 );
1476+ ScoredRemats.truncate (RematIdx);
14791477 } while ((updateAndVerifyRPTargets (RecomputeRP) || TargetRegions.any ()) &&
14801478 !ScoredRemats.empty ());
14811479 if (RescheduleRegions.none ())
@@ -2208,20 +2206,11 @@ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(
22082206 if (!MinFreq)
22092207 return ;
22102208
2211- // Normalize to minimum observed frequency to avoid overflows when adding up
2212- // frequencies.
2209+ // Normalize to minimum observed frequency to avoid underflows/ overflows when
2210+ // combining frequencies.
22132211 for (uint64_t &Freq : Regions)
22142212 Freq /= MinFreq;
22152213 MaxFreq /= MinFreq;
2216-
2217- // Compute the scaling factor for scoring frequency differences.
2218- const uint64_t MaxDiff = MaxFreq - 1 ;
2219- const uint64_t MaxReprFreqValue = (1 << FreqDiffWidth) - 1 ;
2220- RescaleIsDenom = (2 * MaxDiff) & ~MaxReprFreqValue;
2221- if (RescaleIsDenom)
2222- RescaleFactor = (2 * MaxDiff) >> FreqDiffWidth;
2223- else
2224- RescaleFactor = MaxDiff ? MaxReprFreqValue / (2 * MaxDiff) : 1 ;
22252214}
22262215
22272216PreRARematStage::ScoredRemat::ScoredRemat (const RematReg *Remat,
@@ -2244,7 +2233,7 @@ unsigned PreRARematStage::ScoredRemat::getNumRegs(
22442233 return divideCeil (RegSize, 32 );
22452234}
22462235
2247- uint64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Freq) const {
2236+ int64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Freq) const {
22482237 // Get frequencies of defining and using regions. A rematerialization from the
22492238 // least frequent region to the most frequent region will yield the greatest
22502239 // latency penalty and therefore should get minimum score. Reciprocally, a
@@ -2256,36 +2245,22 @@ uint64_t PreRARematStage::ScoredRemat::getFreqDiff(const FreqInfo &Freq) const {
22562245 uint64_t UseOrMax = Freq.Regions [Remat->UseRegion ];
22572246 if (!UseOrMax)
22582247 UseOrMax = Freq.MaxFreq ;
2259-
2260- // Maximum difference in frequency between defining and using regions.
2261- const uint64_t MaxDiff = Freq.MaxFreq - 1 ;
2262- // The difference between defining and using frequency is in the range
2263- // [-MaxDiff, MaxDiff], shift it to [0,2 x MaxDiff] to stay in the positive
2264- // range, then rescale to the representable range in the final score.
2265- const uint64_t FreqDiff = (MaxDiff + (DefOrOne - UseOrMax));
2266- if (Freq.RescaleIsDenom )
2267- return FreqDiff / Freq.RescaleFactor ;
2268- return FreqDiff * Freq.RescaleFactor ;
2248+ return DefOrOne - UseOrMax;
22692249}
22702250
22712251void PreRARematStage::ScoredRemat::update (const BitVector &TargetRegions,
22722252 ArrayRef<GCNRPTarget> RPTargets,
22732253 const FreqInfo &FreqInfo,
22742254 bool ReduceSpill) {
2275- setNullScore ();
2276- if (!Remat->maybeBeneficial (TargetRegions, RPTargets))
2277- return ;
2278-
2279- Register Reg = Remat->getReg ();
2280- uint64_t MaxFreq = 0 ;
2281- ScoreTy NumBenefitingRegions = 0 ;
2255+ MaxFreq = 0 ;
2256+ RegionImpact = 0 ;
22822257 for (unsigned I : TargetRegions.set_bits ()) {
2283- if (!Remat->Live [I] || !RPTargets[I].isSaveBeneficial (Reg ))
2258+ if (!Remat->Live [I] || !RPTargets[I].isSaveBeneficial (Remat-> getReg () ))
22842259 continue ;
22852260 bool UnusedLT = Remat->isUnusedLiveThrough (I);
22862261
22872262 // Regions in which RP is guaranteed to decrease have more weight.
2288- NumBenefitingRegions += UnusedLT ? 2 : 1 ;
2263+ RegionImpact += UnusedLT ? 2 : 1 ;
22892264
22902265 if (ReduceSpill) {
22912266 uint64_t Freq = FreqInfo.Regions [I];
@@ -2297,9 +2272,6 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
22972272 MaxFreq = std::max (MaxFreq, Freq);
22982273 }
22992274 }
2300- setMaxFreqScore (MaxFreq);
2301- setFreqDiffScore (FreqDiff);
2302- setRegionImpactScore (NumBenefitingRegions * NumRegs);
23032275}
23042276
23052277void PreRARematStage::rematerialize (const RematReg &Remat,
0 commit comments