Skip to content

Commit 48ecba3

Browse files
author
Sjoerd Meijer
committed
[MachineLICM][MachineSink] Move SinkIntoLoop to MachineSink.
This moves SinkIntoLoop from MachineLICM to MachineSink. The motivation for this work is that hoisting is a canonicalisation transformation, but we do not really have a good story to sink instructions back if that is better, e.g. to reduce live-ranges, register pressure and spilling. This has been discussed a few times on the list, the latest thread is: https://lists.llvm.org/pipermail/llvm-dev/2020-December/147184.html There it was pointed out that we have the LoopSink IR pass, but that works on IR, lacks register pressure informatiom, and is focused on profile guided optimisations, and then we have MachineLICM and MachineSink that both perform sinking. MachineLICM is more about hoisting and CSE'ing of hoisted instructions. It also contained a very incomplete and disabled-by-default SinkIntoLoop feature, which we now move to MachineSink. Getting loop-sinking to do something useful is going to be at least a 3-step approach: 1) This is just moving the code and is almost a NFC, but contains a bug fix. This uses helper function `isLoopInvariant` that was factored out in D94082 and added to MachineLoop. 2) A first functional change to make loop-sink a little bit less restrictive, which it really is at the moment, is the change in D94308. This lets it do more (alias) analysis using functions in MachineSink, making it a bit more powerful. Nothing changes much: still off by default. But it shows that MachineSink is a better home for this, and it starts using its functionality like `hasStoreBetween`, and in the next step we can use `isProfitableToSinkTo`. 3) This is the going to be he interesting step: decision making when and how many instructions to sink. This will be driven by the register pressure, and deciding if reducing live-ranges and loop sinking will help in better performance. 4) Once we are happy with 3), this should be enabled by default, that should be the end goal of this exercise. Differential Revision: https://reviews.llvm.org/D93694
1 parent 0175cd0 commit 48ecba3

File tree

5 files changed

+1555
-371
lines changed

5 files changed

+1555
-371
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 0 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,6 @@ HoistCheapInsts("hoist-cheap-insts",
6969
cl::init(false), cl::Hidden);
7070

7171
static cl::opt<bool>
72-
SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
73-
cl::desc("MachineLICM should sink instructions into "
74-
"loops to avoid register spills"),
75-
cl::init(false), cl::Hidden);
76-
static cl::opt<bool>
7772
HoistConstStores("hoist-const-stores",
7873
cl::desc("Hoist invariant stores"),
7974
cl::init(true), cl::Hidden);
@@ -246,8 +241,6 @@ namespace {
246241

247242
void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
248243

249-
void SinkIntoLoop();
250-
251244
void InitRegPressure(MachineBasicBlock *BB);
252245

253246
DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
@@ -395,9 +388,6 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
395388
FirstInLoop = true;
396389
HoistOutOfLoop(N);
397390
CSEMap.clear();
398-
399-
if (SinkInstsToAvoidSpills)
400-
SinkIntoLoop();
401391
}
402392
}
403393

@@ -787,88 +777,6 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
787777
}
788778
}
789779

790-
/// Sink instructions into loops if profitable. This especially tries to prevent
791-
/// register spills caused by register pressure if there is little to no
792-
/// overhead moving instructions into loops.
793-
void MachineLICMBase::SinkIntoLoop() {
794-
MachineBasicBlock *Preheader = getCurPreheader();
795-
if (!Preheader)
796-
return;
797-
798-
SmallVector<MachineInstr *, 8> Candidates;
799-
for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
800-
I != Preheader->instr_end(); ++I) {
801-
// We need to ensure that we can safely move this instruction into the loop.
802-
// As such, it must not have side-effects, e.g. such as a call has.
803-
LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I);
804-
if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) {
805-
LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n");
806-
Candidates.push_back(&*I);
807-
continue;
808-
}
809-
LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n");
810-
}
811-
812-
for (MachineInstr *I : Candidates) {
813-
const MachineOperand &MO = I->getOperand(0);
814-
if (!MO.isDef() || !MO.isReg() || !MO.getReg())
815-
continue;
816-
if (!MRI->hasOneDef(MO.getReg()))
817-
continue;
818-
bool CanSink = true;
819-
MachineBasicBlock *SinkBlock = nullptr;
820-
LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I);
821-
822-
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
823-
LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump());
824-
// FIXME: Come up with a proper cost model that estimates whether sinking
825-
// the instruction (and thus possibly executing it on every loop
826-
// iteration) is more expensive than a register.
827-
// For now assumes that copies are cheap and thus almost always worth it.
828-
if (!MI.isCopy()) {
829-
CanSink = false;
830-
break;
831-
}
832-
if (!SinkBlock) {
833-
SinkBlock = MI.getParent();
834-
LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: "
835-
<< printMBBReference(*SinkBlock) << "\n");
836-
continue;
837-
}
838-
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
839-
if (!SinkBlock) {
840-
LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n");
841-
CanSink = false;
842-
break;
843-
}
844-
LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " <<
845-
printMBBReference(*SinkBlock) << "\n");
846-
}
847-
if (!CanSink) {
848-
LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n");
849-
continue;
850-
}
851-
if (!SinkBlock) {
852-
LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n");
853-
continue;
854-
}
855-
if (SinkBlock == Preheader) {
856-
LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n");
857-
continue;
858-
}
859-
860-
LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock)
861-
<< " from " << printMBBReference(*I->getParent())
862-
<< ": " << *I);
863-
SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
864-
865-
// The instruction is moved from its basic block, so do not retain the
866-
// debug information.
867-
assert(!I->isDebugInstr() && "Should not sink debug inst");
868-
I->setDebugLoc(DebugLoc());
869-
}
870-
}
871-
872780
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
873781
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
874782
}

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,14 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold(
9191
"the straight line is higher than this threshold."),
9292
cl::init(20), cl::Hidden);
9393

94+
static cl::opt<bool>
95+
SinkInstsIntoLoop("sink-insts-to-avoid-spills",
96+
cl::desc("Sink instructions into loops to avoid "
97+
"register spills"),
98+
cl::init(false), cl::Hidden);
99+
94100
STATISTIC(NumSunk, "Number of machine instructions sunk");
101+
STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
95102
STATISTIC(NumSplit, "Number of critical edges split");
96103
STATISTIC(NumCoalesces, "Number of copies coalesced");
97104
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
@@ -216,6 +223,11 @@ namespace {
216223
bool &LocalUse) const;
217224
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
218225
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
226+
227+
void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
228+
SmallVectorImpl<MachineInstr *> &Candidates);
229+
bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
230+
219231
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
220232
MachineBasicBlock *MBB,
221233
MachineBasicBlock *SuccToSinkTo,
@@ -340,6 +352,60 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
340352
return true;
341353
}
342354

355+
/// Return true if this machine instruction loads from global offset table or
356+
/// constant pool.
357+
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
358+
assert(MI.mayLoad() && "Expected MI that loads!");
359+
360+
// If we lost memory operands, conservatively assume that the instruction
361+
// reads from everything..
362+
if (MI.memoperands_empty())
363+
return true;
364+
365+
for (MachineMemOperand *MemOp : MI.memoperands())
366+
if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
367+
if (PSV->isGOT() || PSV->isConstantPool())
368+
return true;
369+
370+
return false;
371+
}
372+
373+
void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
374+
SmallVectorImpl<MachineInstr *> &Candidates) {
375+
for (auto &MI : *BB) {
376+
LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
377+
if (!TII->shouldSink(MI)) {
378+
LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
379+
"target\n");
380+
continue;
381+
}
382+
if (!L->isLoopInvariant(MI)) {
383+
LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
384+
continue;
385+
}
386+
bool DontMoveAcrossStore = true;
387+
if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
388+
LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
389+
continue;
390+
}
391+
if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
392+
LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
393+
continue;
394+
}
395+
if (MI.isConvergent())
396+
continue;
397+
398+
const MachineOperand &MO = MI.getOperand(0);
399+
if (!MO.isReg() || !MO.getReg() || !MO.isDef())
400+
continue;
401+
if (!MRI->hasOneDef(MO.getReg()))
402+
continue;
403+
404+
LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
405+
Candidates.push_back(&MI);
406+
}
407+
}
408+
343409
bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
344410
if (skipFunction(MF.getFunction()))
345411
return false;
@@ -389,6 +455,29 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
389455
EverMadeChange = true;
390456
}
391457

458+
if (SinkInstsIntoLoop) {
459+
SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
460+
for (auto *L : Loops) {
461+
MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
462+
if (!Preheader) {
463+
LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
464+
continue;
465+
}
466+
SmallVector<MachineInstr *, 8> Candidates;
467+
FindLoopSinkCandidates(L, Preheader, Candidates);
468+
469+
// Walk the candidates in reverse order so that we start with the use
470+
// of a def-use chain, if there is any.
471+
for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
472+
MachineInstr *I = *It;
473+
if (!SinkIntoLoop(L, *I))
474+
break;
475+
EverMadeChange = true;
476+
++NumLoopSunk;
477+
}
478+
}
479+
}
480+
392481
HasStoreCache.clear();
393482
StoreInstrCache.clear();
394483

@@ -1098,6 +1187,73 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
10981187
return HasAliasedStore;
10991188
}
11001189

1190+
/// Sink instructions into loops if profitable. This especially tries to prevent
1191+
/// register spills caused by register pressure if there is little to no
1192+
/// overhead moving instructions into loops.
1193+
bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
1194+
LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
1195+
MachineBasicBlock *Preheader = L->getLoopPreheader();
1196+
assert(Preheader && "Loop sink needs a preheader block");
1197+
MachineBasicBlock *SinkBlock = nullptr;
1198+
bool CanSink = true;
1199+
const MachineOperand &MO = I.getOperand(0);
1200+
1201+
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
1202+
LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI);
1203+
if (!L->contains(&MI)) {
1204+
LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n");
1205+
CanSink = false;
1206+
break;
1207+
}
1208+
1209+
// FIXME: Come up with a proper cost model that estimates whether sinking
1210+
// the instruction (and thus possibly executing it on every loop
1211+
// iteration) is more expensive than a register.
1212+
// For now assumes that copies are cheap and thus almost always worth it.
1213+
if (!MI.isCopy()) {
1214+
LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n");
1215+
CanSink = false;
1216+
break;
1217+
}
1218+
if (!SinkBlock) {
1219+
SinkBlock = MI.getParent();
1220+
LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: "
1221+
<< printMBBReference(*SinkBlock) << "\n");
1222+
continue;
1223+
}
1224+
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
1225+
if (!SinkBlock) {
1226+
LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n");
1227+
CanSink = false;
1228+
break;
1229+
}
1230+
LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " <<
1231+
printMBBReference(*SinkBlock) << "\n");
1232+
}
1233+
1234+
if (!CanSink) {
1235+
LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
1236+
return false;
1237+
}
1238+
if (!SinkBlock) {
1239+
LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
1240+
return false;
1241+
}
1242+
if (SinkBlock == Preheader) {
1243+
LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
1244+
return false;
1245+
}
1246+
1247+
LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
1248+
SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
1249+
1250+
// The instruction is moved from its basic block, so do not retain the
1251+
// debug information.
1252+
assert(!I.isDebugInstr() && "Should not sink debug inst");
1253+
I.setDebugLoc(DebugLoc());
1254+
return true;
1255+
}
1256+
11011257
/// SinkInstruction - Determine whether it is safe to sink the specified machine
11021258
/// instruction out of its current block into a successor.
11031259
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,

0 commit comments

Comments
 (0)