@@ -1193,34 +1193,7 @@ bool PreRARematStage::initGCNSchedStage() {
11931193 printTargetRegions (/* PrintAll=*/ TargetRegions.none ());
11941194 });
11951195
1196- // Compute region frequencies. 0 encodes an unknown region frequency.
1197- SmallVector<uint64_t > RegionFreq;
1198- RegionFreq.reserve (NumRegions);
1199- assert (DAG.MLI && " MLI not defined in DAG" );
1200- MachineBranchProbabilityInfo MBPI;
1201- MachineBlockFrequencyInfo MBFI (MF, MBPI, *DAG.MLI );
1202- uint64_t MinFreq = MBFI.getEntryFreq ().getFrequency (), MaxFreq = 0 ;
1203- for (const MachineBasicBlock *MBB : RegionBB) {
1204- uint64_t BlockFreq = MBFI.getBlockFreq (MBB).getFrequency ();
1205- RegionFreq.push_back (BlockFreq);
1206- if (BlockFreq < MinFreq)
1207- MinFreq = BlockFreq;
1208- else if (BlockFreq > MaxFreq)
1209- MaxFreq = BlockFreq;
1210- }
1211- REMAT_DEBUG ({
1212- dbgs () << " Region frequencies:\n " ;
1213- for (auto [I, Freq] : enumerate(RegionFreq)) {
1214- dbgs () << REMAT_PREFIX << " [" << I << " ] " ;
1215- if (Freq)
1216- dbgs () << Freq;
1217- else
1218- dbgs () << " unknown " ;
1219- dbgs () << " | " << *DAG.Regions [I].first ;
1220- }
1221- });
1222-
1223- if (!collectRematRegs (MIRegion, RegionFreq)) {
1196+ if (!collectRematRegs (MIRegion)) {
12241197 REMAT_DEBUG (dbgs () << " No rematerializable registers\n " );
12251198 return false ;
12261199 }
@@ -1230,20 +1203,21 @@ bool PreRARematStage::initGCNSchedStage() {
12301203 Remat.print ();
12311204 });
12321205
1206+ const ScoredRemat::FreqInfo FreqInfo (MF, DAG);
12331207 SmallVector<ScoredRemat> ScoredRemats;
12341208 for (const RematReg &Remat : RematRegs)
1235- ScoredRemats.emplace_back (&Remat, MinFreq, MaxFreq, DAG);
1236- BitVector RecomputeRP (NumRegions);
1209+ ScoredRemats.emplace_back (&Remat, FreqInfo, DAG);
12371210
12381211// Rematerialize registers in successive rounds until all RP targets are
12391212// satisifed or until we run out of rematerialization candidates.
12401213#ifndef NDEBUG
12411214 unsigned RoundNum = 0 ;
12421215#endif
1216+ BitVector RecomputeRP (NumRegions);
12431217 do {
12441218 // (Re-)Score and (re-)sort all remats in increasing score order.
12451219 for (ScoredRemat &Remat : ScoredRemats)
1246- Remat.update (TargetRegions, RPTargets, RegionFreq , !TargetOcc);
1220+ Remat.update (TargetRegions, RPTargets, FreqInfo , !TargetOcc);
12471221 sort (ScoredRemats);
12481222
12491223 REMAT_DEBUG ({
@@ -1885,10 +1859,7 @@ bool PreRARematStage::setObjective() {
18851859}
18861860
18871861bool PreRARematStage::collectRematRegs (
1888- const DenseMap<MachineInstr *, unsigned > &MIRegion,
1889- ArrayRef<uint64_t > RegionFreq) {
1890- assert (RegionFreq.size () == DAG.Regions .size ());
1891-
1862+ const DenseMap<MachineInstr *, unsigned > &MIRegion) {
18921863 // We need up-to-date live-out info. to query live-out register masks in
18931864 // regions containing rematerializable instructions.
18941865 DAG.RegionLiveOuts .buildLiveRegMap ();
@@ -1948,7 +1919,7 @@ bool PreRARematStage::collectRematRegs(
19481919
19491920 // Add the instruction to the rematerializable list.
19501921 RematRegSet.insert (Reg);
1951- RematRegs.emplace_back (&DefMI, UseMI, DAG, MIRegion, RegionFreq );
1922+ RematRegs.emplace_back (&DefMI, UseMI, DAG, MIRegion);
19521923 }
19531924 }
19541925
@@ -1957,12 +1928,10 @@ bool PreRARematStage::collectRematRegs(
19571928
19581929PreRARematStage::RematReg::RematReg (
19591930 MachineInstr *DefMI, MachineInstr *UseMI, GCNScheduleDAGMILive &DAG,
1960- const DenseMap<MachineInstr *, unsigned > &MIRegion,
1961- ArrayRef<uint64_t > RegionFreq)
1931+ const DenseMap<MachineInstr *, unsigned > &MIRegion)
19621932 : DefMI(DefMI), UseMI(UseMI), LiveIn(DAG.Regions.size()),
19631933 LiveOut(DAG.Regions.size()), Live(DAG.Regions.size()),
1964- DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)),
1965- DefFrequency(RegionFreq[DefRegion]), UseFrequency(RegionFreq[UseRegion]) {
1934+ DefRegion(MIRegion.at(DefMI)), UseRegion(MIRegion.at(UseMI)) {
19661935
19671936 // Mark regions in which the rematerializable register is live.
19681937 Register Reg = getReg ();
@@ -2004,11 +1973,50 @@ void PreRARematStage::RematReg::insertMI(unsigned RegionIdx,
20041973 DAG.LIS ->createAndComputeVirtRegInterval (RematMI->getOperand (0 ).getReg ());
20051974}
20061975
1976+ PreRARematStage::ScoredRemat::FreqInfo::FreqInfo (
1977+ MachineFunction &MF, const GCNScheduleDAGMILive &DAG) {
1978+ assert (DAG.MLI && " MLI not defined in DAG" );
1979+ MachineBranchProbabilityInfo MBPI;
1980+ MachineBlockFrequencyInfo MBFI (MF, MBPI, *DAG.MLI );
1981+
1982+ const unsigned NumRegions = DAG.Regions .size ();
1983+ uint64_t MinFreq = MBFI.getEntryFreq ().getFrequency ();
1984+ Regions.reserve (NumRegions);
1985+ MaxFreq = 0 ;
1986+ for (unsigned I = 0 ; I < NumRegions; ++I) {
1987+ MachineBasicBlock *MBB = DAG.Regions [I].first ->getParent ();
1988+ uint64_t BlockFreq = MBFI.getBlockFreq (MBB).getFrequency ();
1989+ Regions.push_back (BlockFreq);
1990+ if (BlockFreq && BlockFreq < MinFreq)
1991+ MinFreq = BlockFreq;
1992+ else if (BlockFreq > MaxFreq)
1993+ MaxFreq = BlockFreq;
1994+ }
1995+ if (MinFreq) {
1996+ // Normalize to minimum observed frequency to avoid overflows when adding up
1997+ // frequencies.
1998+ for (uint64_t &Freq : Regions)
1999+ Freq /= MinFreq;
2000+ MaxFreq /= MinFreq;
2001+ }
2002+
2003+ REMAT_DEBUG ({
2004+ dbgs () << " Region frequencies\n " ;
2005+ for (auto [I, Freq] : enumerate(Regions)) {
2006+ dbgs () << REMAT_PREFIX << " [" << I << " ] " ;
2007+ if (Freq)
2008+ dbgs () << Freq;
2009+ else
2010+ dbgs () << " unknown " ;
2011+ dbgs () << " | " << *DAG.Regions [I].first ;
2012+ }
2013+ });
2014+ }
2015+
20072016PreRARematStage::ScoredRemat::ScoredRemat (const RematReg *Remat,
2008- uint64_t MinFreq, uint64_t MaxFreq ,
2017+ const FreqInfo &Freq ,
20092018 const GCNScheduleDAGMILive &DAG)
2010- : Remat(Remat), NumRegs(getNumRegs(DAG)),
2011- FreqDiff(getFreqDiff(MinFreq, MaxFreq)) {}
2019+ : Remat(Remat), NumRegs(getNumRegs(DAG)), FreqDiff(getFreqDiff(Freq)) {}
20122020
20132021unsigned PreRARematStage::ScoredRemat::getNumRegs (
20142022 const GCNScheduleDAGMILive &DAG) const {
@@ -2021,23 +2029,35 @@ unsigned PreRARematStage::ScoredRemat::getNumRegs(
20212029 return divideCeil (DAG.TRI ->getRegSizeInBits (RC), 32 );
20222030}
20232031
2024- uint64_t PreRARematStage::ScoredRemat::getFreqDiff (uint64_t MinFreq,
2025- uint64_t MaxFreq) const {
2026- uint64_t DefOrMin = Remat->DefFrequency ? Remat->DefFrequency : MinFreq;
2027- uint64_t UseOrMax = Remat->UseFrequency ? Remat->UseFrequency : MaxFreq;
2028- uint64_t MaxDiff = MaxFreq - MinFreq;
2029- // This is equivalent to (2 * MaxDiff) / 2^NumBitsLatency.
2030- uint64_t RescaleDenom = MaxDiff >> (FreqDiffWidth - 1 );
2031- RescaleDenom = std::max (RescaleDenom, (uint64_t )1 );
2032+ uint64_t PreRARematStage::ScoredRemat::getFreqDiff (const FreqInfo &Info) const {
2033+ // Get frequencies of defining and using regions. A rematerialization from the
2034+ // least frequent region to the most frequent region will yield the greatest
2035+ // latency penalty and therefore should get minimum score. Reciprocally, a
2036+ // rematerialization in the other direction should get maximum score. Default
2037+ // to values that will yield the worst possible score given known frequencies
2038+ // in order to penalize rematerializations from or into regions whose
2039+ // frequency is unknown
2040+ uint64_t DefOrOne = Info.Regions [Remat->DefRegion ];
2041+ if (!DefOrOne)
2042+ DefOrOne = 1 ;
2043+ uint64_t UseOrMax = Info.Regions [Remat->UseRegion ];
2044+ if (!UseOrMax)
2045+ UseOrMax = Info.MaxFreq ;
2046+
2047+ // Maximum difference in frequency between defining and using regions.
2048+ const uint64_t MaxDiff = Info.MaxFreq - 1 ;
2049+ // This is equivalent to max( (2 * MaxDiff) / 2^NumBitsLatency , 1 ).
2050+ const uint64_t RescaleDenom =
2051+ std::max (MaxDiff >> (FreqDiffWidth - 1 ), (uint64_t )1 );
20322052 // The difference between defining and using frequency is in the range
20332053 // [-MaxDiff, MaxDiff], shift it to [0,2 x MaxDiff] to stay in the positive
20342054 // range, then rescale to [0, 2^NumBitsLatency - 1]
2035- return (MaxDiff + (DefOrMin - UseOrMax)) / RescaleDenom;
2055+ return (MaxDiff + (DefOrOne - UseOrMax)) / RescaleDenom;
20362056}
20372057
20382058void PreRARematStage::ScoredRemat::update (const BitVector &TargetRegions,
20392059 ArrayRef<GCNRPTarget> RPTargets,
2040- ArrayRef< uint64_t > RegionFreq ,
2060+ const FreqInfo &FreqInfo ,
20412061 bool ReduceSpill) {
20422062 setNullScore ();
20432063 if (!Remat->maybeBeneficial (TargetRegions, RPTargets))
@@ -2055,7 +2075,7 @@ void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,
20552075 NumBenefitingRegions += UnusedLT ? 2 : 1 ;
20562076
20572077 if (ReduceSpill) {
2058- uint64_t Freq = RegionFreq [I];
2078+ uint64_t Freq = FreqInfo. Regions [I];
20592079 if (!UnusedLT) {
20602080 // Apply a frequency penalty in regions in which we are not sure that RP
20612081 // will decrease.
0 commit comments