4444#include " llvm/CodeGen/TargetInstrInfo.h"
4545#include " llvm/CodeGen/TargetPassConfig.h"
4646#include " llvm/CodeGen/TargetRegisterInfo.h"
47+ #include " llvm/CodeGen/TargetSchedule.h"
4748#include " llvm/CodeGen/TargetSubtargetInfo.h"
4849#include " llvm/IR/BasicBlock.h"
4950#include " llvm/IR/DebugInfoMetadata.h"
@@ -100,12 +101,6 @@ static cl::opt<bool>
100101 " register spills" ),
101102 cl::init(false ), cl::Hidden);
102103
103- static cl::opt<bool > AggressivelySinkInstsIntoCycle (
104- " aggressive-sink-insts-into-cycles" ,
105- cl::desc (" Aggressively sink instructions into cycles to avoid "
106- " register spills" ),
107- cl::init(false ), cl::Hidden);
108-
109104static cl::opt<unsigned > SinkIntoCycleLimit (
110105 " machine-sink-cycle-limit" ,
111106 cl::desc (
@@ -135,6 +130,7 @@ class MachineSinking : public MachineFunctionPass {
135130 const MachineBranchProbabilityInfo *MBPI = nullptr ;
136131 AliasAnalysis *AA = nullptr ;
137132 RegisterClassInfo RegClassInfo;
133+ TargetSchedModel SchedModel;
138134
139135 // Remember which edges have been considered for breaking.
140136 SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8 >
@@ -262,7 +258,6 @@ class MachineSinking : public MachineFunctionPass {
262258
263259 void FindCycleSinkCandidates (MachineCycle *Cycle, MachineBasicBlock *BB,
264260 SmallVectorImpl<MachineInstr *> &Candidates);
265- bool SinkIntoCycle (MachineCycle *Cycle, MachineInstr &I);
266261
267262 bool isDead (const MachineInstr *MI) const ;
268263 bool aggressivelySinkIntoCycle (
@@ -284,11 +279,14 @@ class MachineSinking : public MachineFunctionPass {
284279 GetAllSortedSuccessors (MachineInstr &MI, MachineBasicBlock *MBB,
285280 AllSuccsCache &AllSuccessors) const ;
286281
287- std::vector<unsigned > &getBBRegisterPressure (const MachineBasicBlock &MBB);
282+ std::vector<unsigned > &getBBRegisterPressure (const MachineBasicBlock &MBB,
283+ bool UseCache = true );
288284
289285 bool registerPressureSetExceedsLimit (unsigned NRegs,
290286 const TargetRegisterClass *RC,
291287 const MachineBasicBlock &MBB);
288+
289+ bool registerPressureExceedsLimit (const MachineBasicBlock &MBB);
292290};
293291
294292} // end anonymous namespace
@@ -787,48 +785,63 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
787785 EverMadeChange = true ;
788786 }
789787
790- if (SinkInstsIntoCycle || AggressivelySinkInstsIntoCycle ) {
788+ if (SinkInstsIntoCycle) {
791789 SmallVector<MachineCycle *, 8 > Cycles (CI->toplevel_cycles ());
790+ SchedModel.init (STI);
791+ enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
792792
793- DenseMap<std::pair<MachineInstr *, MachineBasicBlock *>, MachineInstr *>
794- SunkInstrs;
795- for (auto *Cycle : Cycles) {
796- MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
797- if (!Preheader) {
798- LLVM_DEBUG (dbgs () << " CycleSink: Can't find preheader\n " );
799- continue ;
800- }
801- SmallVector<MachineInstr *, 8 > Candidates;
802- FindCycleSinkCandidates (Cycle, Preheader, Candidates);
803-
804- // Walk the candidates in reverse order so that we start with the use
805- // of a def-use chain, if there is any.
806- // TODO: Sort the candidates using a cost-model.
807- unsigned i = 0 ;
808-
809- for (MachineInstr *I : llvm::reverse (Candidates)) {
810- // AggressivelySinkInstsIntoCycle sinks a superset of instructions
811- // relative to regular cycle sinking. Thus, this option supercedes
812- // captures all sinking opportunites done
813- if (AggressivelySinkInstsIntoCycle) {
814- aggressivelySinkIntoCycle (Cycle, *I, SunkInstrs);
815- EverMadeChange = true ;
816- ++NumCycleSunk;
793+ CycleSinkStage Stage = CycleSinkStage::COPY;
794+ bool HasHighPressure;
795+ do {
796+ HasHighPressure = false ;
797+ DenseMap<std::pair<MachineInstr *, MachineBasicBlock *>, MachineInstr *>
798+ SunkInstrs;
799+ for (auto *Cycle : Cycles) {
800+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
801+ if (!Preheader) {
802+ LLVM_DEBUG (dbgs () << " CycleSink: Can't find preheader\n " );
817803 continue ;
818804 }
805+ SmallVector<MachineInstr *, 8 > Candidates;
806+ FindCycleSinkCandidates (Cycle, Preheader, Candidates);
807+
808+ unsigned i = 0 ;
809+
810+ // Walk the candidates in reverse order so that we start with the use
811+ // of a def-use chain, if there is any.
812+ // TODO: Sort the candidates using a cost-model.
813+ for (MachineInstr *I : llvm::reverse (Candidates)) {
814+ // CycleSinkStage::COPY: Sink a limited number of copies
815+ if (Stage == CycleSinkStage::COPY) {
816+ if (i++ == SinkIntoCycleLimit) {
817+ LLVM_DEBUG (dbgs ()
818+ << " CycleSink: Limit reached of instructions to "
819+ " be analysed." );
820+ break ;
821+ }
822+
823+ if (!I->isCopy ())
824+ continue ;
825+ }
819826
820- if (i++ == SinkIntoCycleLimit) {
821- LLVM_DEBUG (dbgs () << " CycleSink: Limit reached of instructions to "
822- " be analysed." );
823- break ;
827+ // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
828+ // which the target specifies as low-latency
829+ if (Stage == CycleSinkStage::LOW_LATENCY &&
830+ !TII->hasLowDefLatency (SchedModel, *I, 0 ))
831+ continue ;
832+
833+ if (!aggressivelySinkIntoCycle (Cycle, *I, SunkInstrs))
834+ break ;
835+ EverMadeChange = true ;
836+ ++NumCycleSunk;
824837 }
825838
826- if (!SinkIntoCycle (Cycle, *I))
827- break ;
828- EverMadeChange = true ;
829- ++NumCycleSunk;
839+ // Recalculate the pressure after sinking
840+ if (!HasHighPressure)
841+ HasHighPressure = registerPressureExceedsLimit (*Preheader);
830842 }
831- }
843+ Stage = (CycleSinkStage)(Stage + 1 );
844+ } while (HasHighPressure && Stage < CycleSinkStage::END);
832845 }
833846
834847 HasStoreCache.clear ();
@@ -1081,13 +1094,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
10811094}
10821095
10831096std::vector<unsigned > &
1084- MachineSinking::getBBRegisterPressure (const MachineBasicBlock &MBB) {
1097+ MachineSinking::getBBRegisterPressure (const MachineBasicBlock &MBB,
1098+ bool UseCache) {
10851099 // Currently to save compiling time, MBB's register pressure will not change
10861100 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
10871101 // register pressure is changed after sinking any instructions into it.
10881102 // FIXME: need a accurate and cheap register pressure estiminate model here.
1103+
10891104 auto RP = CachedRegisterPressure.find (&MBB);
1090- if (RP != CachedRegisterPressure.end ())
1105+ if (UseCache && RP != CachedRegisterPressure.end ())
10911106 return RP->second ;
10921107
10931108 RegionPressure Pressure;
@@ -1111,6 +1126,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
11111126 }
11121127
11131128 RPTracker.closeRegion ();
1129+
1130+ if (RP != CachedRegisterPressure.end ()) {
1131+ CachedRegisterPressure[&MBB] = RPTracker.getPressure ().MaxSetPressure ;
1132+ return CachedRegisterPressure[&MBB];
1133+ }
1134+
11141135 auto It = CachedRegisterPressure.insert (
11151136 std::make_pair (&MBB, RPTracker.getPressure ().MaxSetPressure ));
11161137 return It.first ->second ;
@@ -1129,6 +1150,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
11291150 return false ;
11301151}
11311152
1153+ // Recalculate RP and check if any pressure set exceeds the set limit.
1154+ bool MachineSinking::registerPressureExceedsLimit (
1155+ const MachineBasicBlock &MBB) {
1156+ std::vector<unsigned > BBRegisterPressure = getBBRegisterPressure (MBB, false );
1157+
1158+ for (unsigned PS = 0 ; PS < BBRegisterPressure.size (); ++PS) {
1159+ if (BBRegisterPressure[PS] >=
1160+ TRI->getRegPressureSetLimit (*MBB.getParent (), PS)) {
1161+ return true ;
1162+ }
1163+ }
1164+
1165+ return false ;
1166+ }
1167+
11321168// / isProfitableToSinkTo - Return true if it is profitable to sink MI.
11331169bool MachineSinking::isProfitableToSinkTo (Register Reg, MachineInstr &MI,
11341170 MachineBasicBlock *MBB,
@@ -1656,10 +1692,6 @@ bool MachineSinking::aggressivelySinkIntoCycle(
16561692 if (I.getNumDefs () > 1 )
16571693 return false ;
16581694
1659- // Only sink instructions which the target considers to be low latency
1660- if (!TII->isLowLatencyInstruction (I))
1661- return false ;
1662-
16631695 LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Finding sink block for: " << I);
16641696 MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
16651697 assert (Preheader && " Cycle sink needs a preheader block" );
@@ -1741,86 +1773,6 @@ bool MachineSinking::aggressivelySinkIntoCycle(
17411773 return true ;
17421774}
17431775
1744- // / Sink instructions into cycles if profitable. This especially tries to
1745- // / prevent register spills caused by register pressure if there is little to no
1746- // / overhead moving instructions into cycles.
1747- bool MachineSinking::SinkIntoCycle (MachineCycle *Cycle, MachineInstr &I) {
1748- LLVM_DEBUG (dbgs () << " CycleSink: Finding sink block for: " << I);
1749- MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
1750- assert (Preheader && " Cycle sink needs a preheader block" );
1751- MachineBasicBlock *SinkBlock = nullptr ;
1752- bool CanSink = true ;
1753- const MachineOperand &MO = I.getOperand (0 );
1754-
1755- for (MachineInstr &MI : MRI->use_instructions (MO.getReg ())) {
1756- LLVM_DEBUG (dbgs () << " CycleSink: Analysing use: " << MI);
1757- if (!Cycle->contains (MI.getParent ())) {
1758- LLVM_DEBUG (dbgs () << " CycleSink: Use not in cycle, can't sink.\n " );
1759- CanSink = false ;
1760- break ;
1761- }
1762-
1763- // FIXME: Come up with a proper cost model that estimates whether sinking
1764- // the instruction (and thus possibly executing it on every cycle
1765- // iteration) is more expensive than a register.
1766- // For now assumes that copies are cheap and thus almost always worth it.
1767- if (!MI.isCopy ()) {
1768- LLVM_DEBUG (dbgs () << " CycleSink: Use is not a copy\n " );
1769- CanSink = false ;
1770- break ;
1771- }
1772- if (!SinkBlock) {
1773- SinkBlock = MI.getParent ();
1774- LLVM_DEBUG (dbgs () << " CycleSink: Setting sink block to: "
1775- << printMBBReference (*SinkBlock) << " \n " );
1776- continue ;
1777- }
1778- SinkBlock = DT->findNearestCommonDominator (SinkBlock, MI.getParent ());
1779- if (!SinkBlock) {
1780- LLVM_DEBUG (dbgs () << " CycleSink: Can't find nearest dominator\n " );
1781- CanSink = false ;
1782- break ;
1783- }
1784- LLVM_DEBUG (dbgs () << " CycleSink: Setting nearest common dom block: "
1785- << printMBBReference (*SinkBlock) << " \n " );
1786- }
1787-
1788- if (!CanSink) {
1789- LLVM_DEBUG (dbgs () << " CycleSink: Can't sink instruction.\n " );
1790- return false ;
1791- }
1792- if (!SinkBlock) {
1793- LLVM_DEBUG (dbgs () << " CycleSink: Not sinking, can't find sink block.\n " );
1794- return false ;
1795- }
1796- if (SinkBlock == Preheader) {
1797- LLVM_DEBUG (
1798- dbgs () << " CycleSink: Not sinking, sink block is the preheader\n " );
1799- return false ;
1800- }
1801- if (SinkBlock->sizeWithoutDebugLargerThan (SinkLoadInstsPerBlockThreshold)) {
1802- LLVM_DEBUG (
1803- dbgs () << " CycleSink: Not Sinking, block too large to analyse.\n " );
1804- return false ;
1805- }
1806-
1807- LLVM_DEBUG (dbgs () << " CycleSink: Sinking instruction!\n " );
1808- SinkBlock->splice (SinkBlock->SkipPHIsAndLabels (SinkBlock->begin ()), Preheader,
1809- I);
1810-
1811- // Conservatively clear any kill flags on uses of sunk instruction
1812- for (MachineOperand &MO : I.operands ()) {
1813- if (MO.isReg () && MO.readsReg ())
1814- RegsToClearKillFlags.insert (MO.getReg ());
1815- }
1816-
1817- // The instruction is moved from its basic block, so do not retain the
1818- // debug information.
1819- assert (!I.isDebugInstr () && " Should not sink debug inst" );
1820- I.setDebugLoc (DebugLoc ());
1821- return true ;
1822- }
1823-
18241776// / SinkInstruction - Determine whether it is safe to sink the specified machine
18251777// / instruction out of its current block into a successor.
18261778bool MachineSinking::SinkInstruction (MachineInstr &MI, bool &SawStore,
0 commit comments