Skip to content

Commit b32aa25

Browse files
committed
[MachineSink] Add option for aggressive loop sinking
Change-Id: I62a6c6fc2c372523ce9ec98d084a434548609ead
1 parent 1eaa179 commit b32aa25

File tree

4 files changed

+703
-69
lines changed

4 files changed

+703
-69
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ static cl::opt<bool>
100100
"register spills"),
101101
cl::init(false), cl::Hidden);
102102

103+
static cl::opt<bool> AggressivelySinkInstsIntoCycle(
104+
"aggressively-sink-insts-to-avoid-spills",
105+
cl::desc("Aggressively sink instructions into cycles to avoid "
106+
"register spills"),
107+
cl::init(false), cl::Hidden);
108+
103109
static cl::opt<unsigned> SinkIntoCycleLimit(
104110
"machine-sink-cycle-limit",
105111
cl::desc(
@@ -256,6 +262,13 @@ class MachineSinking : public MachineFunctionPass {
256262
SmallVectorImpl<MachineInstr *> &Candidates);
257263
bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
258264

265+
bool isDead(const MachineInstr *MI) const;
266+
bool AggressivelySinkIntoCycle(
267+
MachineCycle *Cycle, MachineInstr &I,
268+
DenseMap<MachineInstr *,
269+
std::list<std::pair<MachineBasicBlock *, MachineInstr *>>>
270+
SunkInstrs);
271+
259272
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
260273
MachineBasicBlock *MBB,
261274
MachineBasicBlock *SuccToSinkTo,
@@ -679,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
679692
SmallVectorImpl<MachineInstr *> &Candidates) {
680693
for (auto &MI : *BB) {
681694
LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
695+
if (MI.isDebugInstr()) {
696+
LLVM_DEBUG(dbgs() << "CycleSink: Dont sink debug instructions\n");
697+
continue;
698+
}
682699
if (!TII->shouldSink(MI)) {
683700
LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
684701
"target\n");
@@ -799,6 +816,30 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
799816
}
800817
}
801818

819+
if (AggressivelySinkInstsIntoCycle) {
820+
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
821+
DenseMap<MachineInstr *,
822+
std::list<std::pair<MachineBasicBlock *, MachineInstr *>>>
823+
SunkInstrs;
824+
for (auto *Cycle : Cycles) {
825+
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
826+
if (!Preheader) {
827+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Can't find preheader\n");
828+
continue;
829+
}
830+
SmallVector<MachineInstr *, 8> Candidates;
831+
FindCycleSinkCandidates(Cycle, Preheader, Candidates);
832+
833+
// Walk the candidates in reverse order so that we start with the use
834+
// of a def-use chain, if there is any.
835+
for (MachineInstr *I : llvm::reverse(Candidates)) {
836+
AggressivelySinkIntoCycle(Cycle, *I, SunkInstrs);
837+
EverMadeChange = true;
838+
++NumCycleSunk;
839+
}
840+
}
841+
}
842+
802843
HasStoreCache.clear();
803844
StoreInstrCache.clear();
804845

@@ -1574,6 +1615,149 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
15741615
return HasAliasedStore;
15751616
}
15761617

1618+
/// Copy paste from DeadMachineInstructionElimImpl
1619+
1620+
bool MachineSinking::isDead(const MachineInstr *MI) const {
1621+
// Instructions without side-effects are dead iff they only define dead regs.
1622+
// This function is hot and this loop returns early in the common case,
1623+
// so only perform additional checks before this if absolutely necessary.
1624+
for (const MachineOperand &MO : MI->all_defs()) {
1625+
Register Reg = MO.getReg();
1626+
if (Reg.isPhysical()) {
1627+
return false;
1628+
} else {
1629+
if (MO.isDead()) {
1630+
#ifndef NDEBUG
1631+
// Basic check on the register. All of them should be 'undef'.
1632+
for (auto &U : MRI->use_nodbg_operands(Reg))
1633+
assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
1634+
#endif
1635+
continue;
1636+
}
1637+
for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
1638+
if (&Use != MI)
1639+
// This def has a non-debug use. Don't delete the instruction!
1640+
return false;
1641+
}
1642+
}
1643+
}
1644+
1645+
// Technically speaking inline asm without side effects and no defs can still
1646+
// be deleted. But there is so much bad inline asm code out there, we should
1647+
// let them be.
1648+
if (MI->isInlineAsm())
1649+
return false;
1650+
1651+
// FIXME: See issue #105950 for why LIFETIME markers are considered dead here.
1652+
if (MI->isLifetimeMarker())
1653+
return true;
1654+
1655+
// If there are no defs with uses, the instruction might be dead.
1656+
return MI->wouldBeTriviallyDead();
1657+
}
1658+
1659+
/// Aggressively sink instructions into cycles. This will aggressively try to
1660+
/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
1661+
/// In particular, it will sink into multiple successor blocks without limits
1662+
/// based on the amount of sinking, or the type of ops being sunk (so long as
1663+
/// they are safe to sink).
1664+
bool MachineSinking::AggressivelySinkIntoCycle(
1665+
MachineCycle *Cycle, MachineInstr &I,
1666+
DenseMap<MachineInstr *,
1667+
std::list<std::pair<MachineBasicBlock *, MachineInstr *>>>
1668+
SunkInstrs) {
1669+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
1670+
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
1671+
assert(Preheader && "Cycle sink needs a preheader block");
1672+
SmallVector<std::pair<MachineOperand, MachineInstr *>> Uses;
1673+
// TODO: support instructions with multiple defs
1674+
if (I.getNumDefs() > 1)
1675+
return false;
1676+
1677+
MachineOperand DefMO = I.getOperand(0);
1678+
for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) {
1679+
Uses.push_back({DefMO, &MI});
1680+
}
1681+
1682+
for (std::pair<MachineOperand, MachineInstr *> Entry : Uses) {
1683+
MachineInstr *MI = Entry.second;
1684+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
1685+
if (MI->isPHI()) {
1686+
LLVM_DEBUG(
1687+
dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
1688+
continue;
1689+
}
1690+
// We cannot sink before the prologue
1691+
if (TII->isBasicBlockPrologue(*MI) || MI->isPosition()) {
1692+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
1693+
"can't sink.\n");
1694+
continue;
1695+
}
1696+
if (!Cycle->contains(MI->getParent())) {
1697+
LLVM_DEBUG(
1698+
dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
1699+
continue;
1700+
}
1701+
1702+
MachineBasicBlock *SinkBlock = MI->getParent();
1703+
MachineInstr *NewMI = nullptr;
1704+
1705+
// Check for the case in which we have already sunk a copy of this
1706+
// instruction into the user block.
1707+
if (SunkInstrs.contains(&I)) {
1708+
auto SunkBlocks = SunkInstrs[&I];
1709+
auto Match = std::find_if(
1710+
SunkBlocks.begin(), SunkBlocks.end(),
1711+
[&SinkBlock](
1712+
std::pair<MachineBasicBlock *, MachineInstr *> SunkEntry) {
1713+
return SunkEntry.first == SinkBlock;
1714+
});
1715+
if (Match != SunkBlocks.end()) {
1716+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
1717+
<< printMBBReference(*SinkBlock) << "\n");
1718+
NewMI = Match->second;
1719+
}
1720+
}
1721+
1722+
// Create a copy of the instruction in the use block.
1723+
if (!NewMI) {
1724+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
1725+
<< printMBBReference(*SinkBlock) << "\n");
1726+
1727+
NewMI = I.getMF()->CloneMachineInstr(&I);
1728+
if (DefMO.getReg().isVirtual()) {
1729+
const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg());
1730+
Register DestReg = MRI->createVirtualRegister(TRC);
1731+
NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(),
1732+
*TRI);
1733+
}
1734+
SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()),
1735+
NewMI);
1736+
SunkInstrs[&I].push_back({SinkBlock, NewMI});
1737+
}
1738+
1739+
// Conservatively clear any kill flags on uses of sunk instruction
1740+
for (MachineOperand &MO : NewMI->operands()) {
1741+
if (MO.isReg() && MO.readsReg())
1742+
RegsToClearKillFlags.insert(MO.getReg());
1743+
}
1744+
1745+
// The instruction is moved from its basic block, so do not retain the
1746+
// debug information.
1747+
assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
1748+
NewMI->setDebugLoc(DebugLoc());
1749+
1750+
// Replace the use with the newly created virtual register.
1751+
MachineOperand UseMO = Entry.first;
1752+
MI->substituteRegister(UseMO.getReg(), NewMI->getOperand(0).getReg(),
1753+
UseMO.getSubReg(), *TRI);
1754+
}
1755+
// If we have replaced all uses, then delete the dead instruction
1756+
if (isDead(&I))
1757+
I.eraseFromParent();
1758+
return true;
1759+
}
1760+
15771761
/// Sink instructions into cycles if profitable. This especially tries to
15781762
/// prevent register spills caused by register pressure if there is little to no
15791763
/// overhead moving instructions into cycles.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --aggressively-sink-insts-to-avoid-spills=1 < %s | FileCheck -check-prefix=SUNK %s
2+
3+
; Check that various edge cases do not crash the compiler
4+
5+
; Multiple uses of sunk valu, chain of sink candidates
6+
7+
define half @global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, half %val) {
8+
; SUNK-LABEL: global_agent_atomic_fmin_ret_f16__amdgpu_no_fine_grained_memory:
9+
%result = atomicrmw fmin ptr addrspace(1) %ptr, half %val syncscope("agent") seq_cst
10+
ret half %result
11+
}
12+
13+
; Sink candidates with multiple defs
14+
15+
define void @memmove_p5_p5(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src, i64 %sz) {
16+
; SUNK-LABEL: memmove_p5_p5:
17+
entry:
18+
tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 %sz, i1 false)
19+
ret void
20+
}

0 commit comments

Comments
 (0)