Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1806,11 +1806,6 @@ class TargetInstrInfo : public MCInstrInfo {
unsigned defaultDefLatency(const MCSchedModel &SchedModel,
const MachineInstr &DefMI) const;

/// Return true if this instruction is considered low latency.
virtual bool isLowLatencyInstruction(const MachineInstr &MI) const {
return false;
};

/// Return true if this opcode has high latency to its result.
virtual bool isHighLatencyDef(int opc) const { return false; }

Expand Down
208 changes: 80 additions & 128 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfoMetadata.h"
Expand Down Expand Up @@ -100,12 +101,6 @@ static cl::opt<bool>
"register spills"),
cl::init(false), cl::Hidden);

static cl::opt<bool> AggressivelySinkInstsIntoCycle(
"aggressive-sink-insts-into-cycles",
cl::desc("Aggressively sink instructions into cycles to avoid "
"register spills"),
cl::init(false), cl::Hidden);

static cl::opt<unsigned> SinkIntoCycleLimit(
"machine-sink-cycle-limit",
cl::desc(
Expand Down Expand Up @@ -135,6 +130,7 @@ class MachineSinking : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI = nullptr;
AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
TargetSchedModel SchedModel;

// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
Expand Down Expand Up @@ -262,7 +258,6 @@ class MachineSinking : public MachineFunctionPass {

void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
SmallVectorImpl<MachineInstr *> &Candidates);
bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);

bool isDead(const MachineInstr *MI) const;
bool aggressivelySinkIntoCycle(
Expand All @@ -284,11 +279,14 @@ class MachineSinking : public MachineFunctionPass {
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;

std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
bool UseCache = true);

bool registerPressureSetExceedsLimit(unsigned NRegs,
const TargetRegisterClass *RC,
const MachineBasicBlock &MBB);

bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
};

} // end anonymous namespace
Expand Down Expand Up @@ -787,48 +785,63 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
EverMadeChange = true;
}

if (SinkInstsIntoCycle || AggressivelySinkInstsIntoCycle) {
if (SinkInstsIntoCycle) {
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
SchedModel.init(STI);
enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };

DenseMap<std::pair<MachineInstr *, MachineBasicBlock *>, MachineInstr *>
SunkInstrs;
for (auto *Cycle : Cycles) {
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
if (!Preheader) {
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
continue;
}
SmallVector<MachineInstr *, 8> Candidates;
FindCycleSinkCandidates(Cycle, Preheader, Candidates);

// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;

for (MachineInstr *I : llvm::reverse(Candidates)) {
// AggressivelySinkInstsIntoCycle sinks a superset of instructions
// relative to regular cycle sinking. Thus, this option supercedes
// captures all sinking opportunites done
if (AggressivelySinkInstsIntoCycle) {
aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs);
EverMadeChange = true;
++NumCycleSunk;
CycleSinkStage Stage = CycleSinkStage::COPY;
bool HasHighPressure;
do {
HasHighPressure = false;
DenseMap<std::pair<MachineInstr *, MachineBasicBlock *>, MachineInstr *>
SunkInstrs;
for (auto *Cycle : Cycles) {
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
if (!Preheader) {
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
continue;
}
SmallVector<MachineInstr *, 8> Candidates;
FindCycleSinkCandidates(Cycle, Preheader, Candidates);

unsigned i = 0;

// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
for (MachineInstr *I : llvm::reverse(Candidates)) {
// CycleSinkStage::COPY: Sink a limited number of copies
if (Stage == CycleSinkStage::COPY) {
if (i++ == SinkIntoCycleLimit) {
LLVM_DEBUG(dbgs()
<< "CycleSink: Limit reached of instructions to "
"be analysed.");
break;
}

if (!I->isCopy())
continue;
}

if (i++ == SinkIntoCycleLimit) {
LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
"be analysed.");
break;
// CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
// which the target specifies as low-latency
if (Stage == CycleSinkStage::LOW_LATENCY &&
!TII->hasLowDefLatency(SchedModel, *I, 0))
continue;

if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs))
break;
EverMadeChange = true;
++NumCycleSunk;
}

if (!SinkIntoCycle(Cycle, *I))
break;
EverMadeChange = true;
++NumCycleSunk;
// Recalculate the pressure after sinking
if (!HasHighPressure)
HasHighPressure = registerPressureExceedsLimit(*Preheader);
}
}
Stage = (CycleSinkStage)(Stage + 1);
} while (HasHighPressure && Stage < CycleSinkStage::END);
}

HasStoreCache.clear();
Expand Down Expand Up @@ -1081,13 +1094,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
}

std::vector<unsigned> &
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
bool UseCache) {
// Currently to save compiling time, MBB's register pressure will not change
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
// register pressure is changed after sinking any instructions into it.
// FIXME: need a accurate and cheap register pressure estiminate model here.

auto RP = CachedRegisterPressure.find(&MBB);
if (RP != CachedRegisterPressure.end())
if (UseCache && RP != CachedRegisterPressure.end())
return RP->second;

RegionPressure Pressure;
Expand All @@ -1111,6 +1126,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
}

RPTracker.closeRegion();

if (RP != CachedRegisterPressure.end()) {
CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
return CachedRegisterPressure[&MBB];
}

auto It = CachedRegisterPressure.insert(
std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
return It.first->second;
Expand All @@ -1129,6 +1150,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
return false;
}

// Recalculate RP and check if any pressure set exceeds the set limit.
bool MachineSinking::registerPressureExceedsLimit(
const MachineBasicBlock &MBB) {
std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, false);

for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
if (BBRegisterPressure[PS] >=
TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) {
return true;
}
}

return false;
}

/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
Expand Down Expand Up @@ -1656,10 +1692,6 @@ bool MachineSinking::aggressivelySinkIntoCycle(
if (I.getNumDefs() > 1)
return false;

// Only sink instructions which the target considers to be low latency
if (!TII->isLowLatencyInstruction(I))
return false;

LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
assert(Preheader && "Cycle sink needs a preheader block");
Expand Down Expand Up @@ -1741,86 +1773,6 @@ bool MachineSinking::aggressivelySinkIntoCycle(
return true;
}

/// Sink instructions into cycles if profitable. This especially tries to
/// prevent register spills caused by register pressure if there is little to no
/// overhead moving instructions into cycles.
bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
assert(Preheader && "Cycle sink needs a preheader block");
MachineBasicBlock *SinkBlock = nullptr;
bool CanSink = true;
const MachineOperand &MO = I.getOperand(0);

for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
if (!Cycle->contains(MI.getParent())) {
LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
CanSink = false;
break;
}

// FIXME: Come up with a proper cost model that estimates whether sinking
// the instruction (and thus possibly executing it on every cycle
// iteration) is more expensive than a register.
// For now assumes that copies are cheap and thus almost always worth it.
if (!MI.isCopy()) {
LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
CanSink = false;
break;
}
if (!SinkBlock) {
SinkBlock = MI.getParent();
LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
<< printMBBReference(*SinkBlock) << "\n");
continue;
}
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
if (!SinkBlock) {
LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
CanSink = false;
break;
}
LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: "
<< printMBBReference(*SinkBlock) << "\n");
}

if (!CanSink) {
LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
return false;
}
if (!SinkBlock) {
LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
return false;
}
if (SinkBlock == Preheader) {
LLVM_DEBUG(
dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
return false;
}
if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
LLVM_DEBUG(
dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
return false;
}

LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
I);

// Conservatively clear any kill flags on uses of sunk instruction
for (MachineOperand &MO : I.operands()) {
if (MO.isReg() && MO.readsReg())
RegsToClearKillFlags.insert(MO.getReg());
}

// The instruction is moved from its basic block, so do not retain the
// debug information.
assert(!I.isDebugInstr() && "Should not sink debug inst");
I.setDebugLoc(DebugLoc());
return true;
}

/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
Expand Down
8 changes: 1 addition & 7 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8676,13 +8676,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();

if (MI.isCopy() || isSMRD(Opc))
return true;

if (SchedModel.hasInstrSchedModel())
return SchedModel.computeInstrLatency(Opc) < 4;

return false;
return isSMRD(Opc);
}

bool SIInstrInfo::isHighLatencyDef(int Opc) const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1291,7 +1291,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
uint64_t getDefaultRsrcDataFormat() const;
uint64_t getScratchRsrcWords23() const;

bool isLowLatencyInstruction(const MachineInstr &MI) const override;
bool isLowLatencyInstruction(const MachineInstr &MI) const;
bool isHighLatencyDef(int Opc) const override;

/// Return the descriptor of the target-specific machine instruction
Expand Down
Loading
Loading