4545#include " llvm/CodeGen/TargetInstrInfo.h"
4646#include " llvm/CodeGen/TargetPassConfig.h"
4747#include " llvm/CodeGen/TargetRegisterInfo.h"
48+ #include " llvm/CodeGen/TargetSchedule.h"
4849#include " llvm/CodeGen/TargetSubtargetInfo.h"
4950#include " llvm/IR/BasicBlock.h"
5051#include " llvm/IR/DebugInfoMetadata.h"
@@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split");
113114STATISTIC (NumCoalesces, " Number of copies coalesced" );
114115STATISTIC (NumPostRACopySink, " Number of copies sunk after RA" );
115116
117+ using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
118+
116119namespace {
117120
118121class MachineSinking : public MachineFunctionPass {
@@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass {
128131 const MachineBranchProbabilityInfo *MBPI = nullptr ;
129132 AliasAnalysis *AA = nullptr ;
130133 RegisterClassInfo RegClassInfo;
134+ TargetSchedModel SchedModel;
131135
132136 // Remember which edges have been considered for breaking.
133137 SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8 >
@@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass {
161165 // / would re-order assignments.
162166 using SeenDbgUser = PointerIntPair<MachineInstr *, 1 >;
163167
168+ using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
169+
164170 // / Record of DBG_VALUE uses of vregs in a block, so that we can identify
165171 // / debug instructions to sink.
166172 SmallDenseMap<unsigned , TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
@@ -255,7 +261,10 @@ class MachineSinking : public MachineFunctionPass {
255261
256262 void FindCycleSinkCandidates (MachineCycle *Cycle, MachineBasicBlock *BB,
257263 SmallVectorImpl<MachineInstr *> &Candidates);
258- bool SinkIntoCycle (MachineCycle *Cycle, MachineInstr &I);
264+
265+ bool
266+ aggressivelySinkIntoCycle (MachineCycle *Cycle, MachineInstr &I,
267+ DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
259268
260269 bool isProfitableToSinkTo (Register Reg, MachineInstr &MI,
261270 MachineBasicBlock *MBB,
@@ -271,11 +280,14 @@ class MachineSinking : public MachineFunctionPass {
271280 GetAllSortedSuccessors (MachineInstr &MI, MachineBasicBlock *MBB,
272281 AllSuccsCache &AllSuccessors) const ;
273282
274- std::vector<unsigned > &getBBRegisterPressure (const MachineBasicBlock &MBB);
283+ std::vector<unsigned > &getBBRegisterPressure (const MachineBasicBlock &MBB,
284+ bool UseCache = true );
275285
276286 bool registerPressureSetExceedsLimit (unsigned NRegs,
277287 const TargetRegisterClass *RC,
278288 const MachineBasicBlock &MBB);
289+
290+ bool registerPressureExceedsLimit (const MachineBasicBlock &MBB);
279291};
280292
281293} // end anonymous namespace
@@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
680692 SmallVectorImpl<MachineInstr *> &Candidates) {
681693 for (auto &MI : *BB) {
682694 LLVM_DEBUG (dbgs () << " CycleSink: Analysing candidate: " << MI);
695+ if (MI.isMetaInstruction ()) {
696+ LLVM_DEBUG (dbgs () << " CycleSink: not sinking meta instruction\n " );
697+ continue ;
698+ }
683699 if (!TII->shouldSink (MI)) {
684700 LLVM_DEBUG (dbgs () << " CycleSink: Instruction not a candidate for this "
685701 " target\n " );
@@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
775791
776792 if (SinkInstsIntoCycle) {
777793 SmallVector<MachineCycle *, 8 > Cycles (CI->toplevel_cycles ());
778- for (auto *Cycle : Cycles) {
779- MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
780- if (!Preheader) {
781- LLVM_DEBUG (dbgs () << " CycleSink: Can't find preheader\n " );
782- continue ;
783- }
784- SmallVector<MachineInstr *, 8 > Candidates;
785- FindCycleSinkCandidates (Cycle, Preheader, Candidates);
786-
787- // Walk the candidates in reverse order so that we start with the use
788- // of a def-use chain, if there is any.
789- // TODO: Sort the candidates using a cost-model.
790- unsigned i = 0 ;
791- for (MachineInstr *I : llvm::reverse (Candidates)) {
792- if (i++ == SinkIntoCycleLimit) {
793- LLVM_DEBUG (dbgs () << " CycleSink: Limit reached of instructions to "
794- " be analysed." );
795- break ;
794+ SchedModel.init (STI);
795+ bool HasHighPressure;
796+
797+ DenseMap<SinkItem, MachineInstr *> SunkInstrs;
798+
799+ enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
800+ for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
801+ ++Stage, SunkInstrs.clear ()) {
802+ HasHighPressure = false ;
803+
804+ for (auto *Cycle : Cycles) {
805+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
806+ if (!Preheader) {
807+ LLVM_DEBUG (dbgs () << " CycleSink: Can't find preheader\n " );
808+ continue ;
796809 }
810+ SmallVector<MachineInstr *, 8 > Candidates;
811+ FindCycleSinkCandidates (Cycle, Preheader, Candidates);
812+
813+ unsigned i = 0 ;
814+
815+ // Walk the candidates in reverse order so that we start with the use
816+ // of a def-use chain, if there is any.
817+ // TODO: Sort the candidates using a cost-model.
818+ for (MachineInstr *I : llvm::reverse (Candidates)) {
819+ // CycleSinkStage::COPY: Sink a limited number of copies
820+ if (Stage == CycleSinkStage::COPY) {
821+ if (i++ == SinkIntoCycleLimit) {
822+ LLVM_DEBUG (dbgs ()
823+ << " CycleSink: Limit reached of instructions to "
824+ " be analyzed." );
825+ break ;
826+ }
827+
828+ if (!I->isCopy ())
829+ continue ;
830+ }
797831
798- if (!SinkIntoCycle (Cycle, *I))
799- break ;
800- EverMadeChange = true ;
801- ++NumCycleSunk;
832+ // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
833+ // which the target specifies as low-latency
834+ if (Stage == CycleSinkStage::LOW_LATENCY &&
835+ !TII->hasLowDefLatency (SchedModel, *I, 0 ))
836+ continue ;
837+
838+ if (!aggressivelySinkIntoCycle (Cycle, *I, SunkInstrs))
839+ break ;
840+ EverMadeChange = true ;
841+ ++NumCycleSunk;
842+ }
843+
844+ // Recalculate the pressure after sinking
845+ if (!HasHighPressure)
846+ HasHighPressure = registerPressureExceedsLimit (*Preheader);
802847 }
848+ if (!HasHighPressure)
849+ break ;
803850 }
804851 }
805852
@@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
10551102}
10561103
10571104std::vector<unsigned > &
1058- MachineSinking::getBBRegisterPressure (const MachineBasicBlock &MBB) {
1105+ MachineSinking::getBBRegisterPressure (const MachineBasicBlock &MBB,
1106+ bool UseCache) {
10591107 // Currently to save compiling time, MBB's register pressure will not change
10601108 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
10611109 // register pressure is changed after sinking any instructions into it.
10621110 // FIXME: need a accurate and cheap register pressure estiminate model here.
1111+
10631112 auto RP = CachedRegisterPressure.find (&MBB);
1064- if (RP != CachedRegisterPressure.end ())
1113+ if (UseCache && RP != CachedRegisterPressure.end ())
10651114 return RP->second ;
10661115
10671116 RegionPressure Pressure;
@@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
10851134 }
10861135
10871136 RPTracker.closeRegion ();
1137+
1138+ if (RP != CachedRegisterPressure.end ()) {
1139+ CachedRegisterPressure[&MBB] = RPTracker.getPressure ().MaxSetPressure ;
1140+ return CachedRegisterPressure[&MBB];
1141+ }
1142+
10881143 auto It = CachedRegisterPressure.insert (
10891144 std::make_pair (&MBB, RPTracker.getPressure ().MaxSetPressure ));
10901145 return It.first ->second ;
@@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
11031158 return false ;
11041159}
11051160
1161+ // Recalculate RP and check if any pressure set exceeds the set limit.
1162+ bool MachineSinking::registerPressureExceedsLimit (
1163+ const MachineBasicBlock &MBB) {
1164+ std::vector<unsigned > BBRegisterPressure = getBBRegisterPressure (MBB, false );
1165+
1166+ for (unsigned PS = 0 ; PS < BBRegisterPressure.size (); ++PS) {
1167+ if (BBRegisterPressure[PS] >=
1168+ TRI->getRegPressureSetLimit (*MBB.getParent (), PS)) {
1169+ return true ;
1170+ }
1171+ }
1172+
1173+ return false ;
1174+ }
1175+
11061176// / isProfitableToSinkTo - Return true if it is profitable to sink MI.
11071177bool MachineSinking::isProfitableToSinkTo (Register Reg, MachineInstr &MI,
11081178 MachineBasicBlock *MBB,
@@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
15811651 return HasAliasedStore;
15821652}
15831653
1584- // / Sink instructions into cycles if profitable. This especially tries to
1585- // / prevent register spills caused by register pressure if there is little to no
1586- // / overhead moving instructions into cycles.
1587- bool MachineSinking::SinkIntoCycle (MachineCycle *Cycle, MachineInstr &I) {
1588- LLVM_DEBUG (dbgs () << " CycleSink: Finding sink block for: " << I);
1654+ // / Aggressively sink instructions into cycles. This will aggressively try to
1655+ // / sink all instructions in the top-most preheaders in an attempt to reduce RP.
1656+ // / In particular, it will sink into multiple successor blocks without limits
1657+ // / based on the amount of sinking, or the type of ops being sunk (so long as
1658+ // / they are safe to sink).
1659+ bool MachineSinking::aggressivelySinkIntoCycle (
1660+ MachineCycle *Cycle, MachineInstr &I,
1661+ DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
1662+ // TODO: support instructions with multiple defs
1663+ if (I.getNumDefs () > 1 )
1664+ return false ;
1665+
1666+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Finding sink block for: " << I);
15891667 MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
15901668 assert (Preheader && " Cycle sink needs a preheader block" );
1591- MachineBasicBlock *SinkBlock = nullptr ;
1592- bool CanSink = true ;
1593- const MachineOperand &MO = I.getOperand (0 );
1594-
1595- for (MachineInstr &MI : MRI->use_instructions (MO.getReg ())) {
1596- LLVM_DEBUG (dbgs () << " CycleSink: Analysing use: " << MI);
1597- if (!Cycle->contains (MI.getParent ())) {
1598- LLVM_DEBUG (dbgs () << " CycleSink: Use not in cycle, can't sink.\n " );
1599- CanSink = false ;
1600- break ;
1601- }
1669+ SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
16021670
1603- // FIXME: Come up with a proper cost model that estimates whether sinking
1604- // the instruction (and thus possibly executing it on every cycle
1605- // iteration) is more expensive than a register.
1606- // For now assumes that copies are cheap and thus almost always worth it.
1607- if (!MI.isCopy ()) {
1608- LLVM_DEBUG (dbgs () << " CycleSink: Use is not a copy\n " );
1609- CanSink = false ;
1610- break ;
1671+ MachineOperand &DefMO = I.getOperand (0 );
1672+ for (MachineInstr &MI : MRI->use_instructions (DefMO.getReg ())) {
1673+ Uses.push_back ({{DefMO.getReg (), DefMO.getSubReg ()}, &MI});
1674+ }
1675+
1676+ for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
1677+ MachineInstr *MI = Entry.second ;
1678+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Analysing use: " << MI);
1679+ if (MI->isPHI ()) {
1680+ LLVM_DEBUG (
1681+ dbgs () << " AggressiveCycleSink: Not attempting to sink for PHI.\n " );
1682+ continue ;
16111683 }
1612- if (!SinkBlock) {
1613- SinkBlock = MI. getParent ();
1614- LLVM_DEBUG (dbgs () << " CycleSink : Setting sink block to: "
1615- << printMBBReference (*SinkBlock) << " \n " );
1684+ // We cannot sink before the prologue
1685+ if (MI-> isPosition () || TII-> isBasicBlockPrologue (*MI)) {
1686+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink : Use is BasicBlock prologue, "
1687+ " can't sink. \n " );
16161688 continue ;
16171689 }
1618- SinkBlock = DT->findNearestCommonDominator (SinkBlock, MI.getParent ());
1619- if (!SinkBlock) {
1620- LLVM_DEBUG (dbgs () << " CycleSink: Can't find nearest dominator\n " );
1621- CanSink = false ;
1622- break ;
1690+ if (!Cycle->contains (MI->getParent ())) {
1691+ LLVM_DEBUG (
1692+ dbgs () << " AggressiveCycleSink: Use not in cycle, can't sink.\n " );
1693+ continue ;
16231694 }
1624- LLVM_DEBUG (dbgs () << " CycleSink: Setting nearest common dom block: "
1625- << printMBBReference (*SinkBlock) << " \n " );
1626- }
16271695
1628- if (!CanSink) {
1629- LLVM_DEBUG (dbgs () << " CycleSink: Can't sink instruction.\n " );
1630- return false ;
1631- }
1632- if (!SinkBlock) {
1633- LLVM_DEBUG (dbgs () << " CycleSink: Not sinking, can't find sink block.\n " );
1634- return false ;
1635- }
1636- if (SinkBlock == Preheader) {
1637- LLVM_DEBUG (
1638- dbgs () << " CycleSink: Not sinking, sink block is the preheader\n " );
1639- return false ;
1640- }
1641- if (SinkBlock->sizeWithoutDebugLargerThan (SinkLoadInstsPerBlockThreshold)) {
1642- LLVM_DEBUG (
1643- dbgs () << " CycleSink: Not Sinking, block too large to analyse.\n " );
1644- return false ;
1645- }
1696+ MachineBasicBlock *SinkBlock = MI->getParent ();
1697+ MachineInstr *NewMI = nullptr ;
1698+ SinkItem MapEntry (&I, SinkBlock);
1699+
1700+ auto SI = SunkInstrs.find (MapEntry);
1701+
1702+ // Check for the case in which we have already sunk a copy of this
1703+ // instruction into the user block.
1704+ if (SI != SunkInstrs.end ()) {
1705+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Already sunk to block: "
1706+ << printMBBReference (*SinkBlock) << " \n " );
1707+ NewMI = SI->second ;
1708+ }
16461709
1647- LLVM_DEBUG (dbgs () << " CycleSink: Sinking instruction!\n " );
1648- SinkBlock->splice (SinkBlock->SkipPHIsAndLabels (SinkBlock->begin ()), Preheader,
1649- I);
1710+ // Create a copy of the instruction in the use block.
1711+ if (!NewMI) {
1712+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Sinking instruction to block: "
1713+ << printMBBReference (*SinkBlock) << " \n " );
1714+
1715+ NewMI = I.getMF ()->CloneMachineInstr (&I);
1716+ if (DefMO.getReg ().isVirtual ()) {
1717+ const TargetRegisterClass *TRC = MRI->getRegClass (DefMO.getReg ());
1718+ Register DestReg = MRI->createVirtualRegister (TRC);
1719+ NewMI->substituteRegister (DefMO.getReg (), DestReg, DefMO.getSubReg (),
1720+ *TRI);
1721+ }
1722+ SinkBlock->insert (SinkBlock->SkipPHIsAndLabels (SinkBlock->begin ()),
1723+ NewMI);
1724+ SunkInstrs.insert ({MapEntry, NewMI});
1725+ }
16501726
1651- // Conservatively clear any kill flags on uses of sunk instruction
1652- for (MachineOperand &MO : I. operands ()) {
1653- if (MO.isReg () && MO.readsReg ())
1727+ // Conservatively clear any kill flags on uses of sunk instruction
1728+ for (MachineOperand &MO : NewMI-> all_uses ()) {
1729+ assert (MO.isReg () && MO.isUse ());
16541730 RegsToClearKillFlags.insert (MO.getReg ());
1655- }
1731+ }
16561732
1657- // The instruction is moved from its basic block, so do not retain the
1658- // debug information.
1659- assert (!I.isDebugInstr () && " Should not sink debug inst" );
1660- I.setDebugLoc (DebugLoc ());
1733+ // The instruction is moved from its basic block, so do not retain the
1734+ // debug information.
1735+ assert (!NewMI->isDebugInstr () && " Should not sink debug inst" );
1736+ NewMI->setDebugLoc (DebugLoc ());
1737+
1738+ // Replace the use with the newly created virtual register.
1739+ RegSubRegPair &UseReg = Entry.first ;
1740+ MI->substituteRegister (UseReg.Reg , NewMI->getOperand (0 ).getReg (),
1741+ UseReg.SubReg , *TRI);
1742+ }
1743+ // If we have replaced all uses, then delete the dead instruction
1744+ if (I.isDead (*MRI))
1745+ I.eraseFromParent ();
16611746 return true ;
16621747}
16631748
0 commit comments