Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/include/llvm/CodeGen/MachineCycleAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ class LLVM_ABI MachineCycleInfoWrapperPass : public MachineFunctionPass {
// version.
LLVM_ABI bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I);

/// Returns true if this machine instruction loads from global offset table or
/// constant pool.
bool mayLoadFromGOTOrConstantPool(MachineInstr &MI);

/// Returns true if this machine instruction can be a sink candidate.
bool isSinkIntoCycleCandidate(MachineInstr &MI, MachineCycle *Cycle,
MachineRegisterInfo *MRI,
const TargetInstrInfo *TII);

class MachineCycleAnalysis : public AnalysisInfoMixin<MachineCycleAnalysis> {
friend AnalysisInfoMixin<MachineCycleAnalysis>;
LLVM_ABI static AnalysisKey Key;
Expand Down
45 changes: 45 additions & 0 deletions llvm/lib/CodeGen/MachineCycleAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,48 @@ bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
// If we got this far, the instruction is cycle invariant!
return true;
}

bool llvm::mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this check so specific? Should it really be checking for invariant loads?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps it's unnecessary. I'll check it later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool llvm::mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
bool llvm::mayLoadFromGOTOrConstantPool(MachineInstr &MI) {

const. Also could just be directly a function of MachineInstr?

assert(MI.mayLoad() && "Expected MI that loads!");

// If we lost memory operands, conservatively assume that the instruction
// reads from everything..
if (MI.memoperands_empty())
return true;

for (MachineMemOperand *MemOp : MI.memoperands())
if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
if (PSV->isGOT() || PSV->isConstantPool())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PSV->isConstant? Or MemOp->isInvariant? Not sure if we verify those are consistent

return true;

return false;
}

bool llvm::isSinkIntoCycleCandidate(MachineInstr &MI, MachineCycle *Cycle,
MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
// Not sinking meta instruction.
if (MI.isMetaInstruction())
return false;
// Instruction not a candidate for this target.
if (!TII->shouldSink(MI))
return false;
// Instruction is not cycle invariant.
if (!isCycleInvariant(Cycle, MI))
return false;
// Instruction not safe to move.
bool DontMoveAcrossStore = true;
if (!MI.isSafeToMove(DontMoveAcrossStore))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to make this less restrictive than always assuming an aliasing store? Some way to do analysis such that we can determine that moving across a store isn't an issue? In my personal case, which is a matrix multiplication loading from/storing to global arrays, this condition blocks sinking.

Pre-ISel, PRE/LICM is happy to hoist everything out and cause problems, but once we get here, we would be unable to sink things back in.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to make this less restrictive than always assuming an aliasing store? Some way to do analysis such that we can determine that moving across a store isn't an issue?

I think technically there is preexisting code that could be reused to do load/store aliasing checks...
See:

bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,

Though IMO that'd probably be out of scope of the draft PR...

return false;
// Dont sink GOT or constant pool loads.
if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this conditional reversed?

If it's a load, and it doesn't load from GOT or constant pool, return false.
Looking at the definition, this predicate definitely returns true for GOT/CP loads, so I think this is a logical error.

This was likely copy-pasted from MachineSink, because that's where I saw this first.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it does seem that way. I'll recheck this.

return false;
if (MI.isConvergent())
return false;
const MachineOperand &MO = MI.getOperand(0);
if (!MO.isReg() || !MO.getReg() || !MO.isDef())
return false;
if (!MRI->hasOneDef(MO.getReg()))
return false;
return true;
}
135 changes: 117 additions & 18 deletions llvm/lib/CodeGen/MachineLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
clEnumValN(UseBFI::All, "all",
"enable the feature with/wo profile data")));

static cl::opt<bool> SinkInstsIntoCycleBeforeLICM(
"sink-insts-before-licm",
cl::desc("Sink instructions into cycles to avoid "
"register spills"),
cl::init(false), cl::Hidden);

STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
Expand Down Expand Up @@ -287,6 +293,8 @@ namespace {
bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
MachineBasicBlock *TgtBlock);
MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);

bool rematerializeIntoCycle(MachineCycle *Cycle, MachineInstr &I);
};

class MachineLICMBase : public MachineFunctionPass {
Expand All @@ -304,7 +312,11 @@ namespace {
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
if (PreRegAlloc)
AU.addRequired<MachineCycleInfoWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
if (PreRegAlloc)
AU.addPreserved<MachineCycleInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
Expand Down Expand Up @@ -348,6 +360,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)

Expand Down Expand Up @@ -396,6 +409,26 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");

if (PreRegAlloc) {
if (SinkInstsIntoCycleBeforeLICM) {
auto *CI = GET_RESULT(MachineCycle, getCycleInfo, Info);
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
for (auto *Cycle : Cycles) {
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
if (!Preheader) {
LLVM_DEBUG(dbgs() << "Rematerialization: Can't find preheader\n");
continue;
}
SmallVector<MachineInstr *, 8> Candidates;
for (auto &MI : *Preheader)
if (isSinkIntoCycleCandidate(MI, Cycle, MRI, TII))
Candidates.push_back(&MI);
// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
for (MachineInstr *I : llvm::reverse(Candidates))
if (rematerializeIntoCycle(Cycle, *I))
Changed = true;
}
}
// Estimate register pressure during pre-regalloc pass.
unsigned NumRPS = TRI->getNumRegPressureSets();
RegPressure.resize(NumRPS);
Expand Down Expand Up @@ -1005,24 +1038,6 @@ MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
return Cost;
}

/// Return true if this machine instruction loads from global offset table or
/// constant pool.
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
assert(MI.mayLoad() && "Expected MI that loads!");

// If we lost memory operands, conservatively assume that the instruction
// reads from everything..
if (MI.memoperands_empty())
return true;

for (MachineMemOperand *MemOp : MI.memoperands())
if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
if (PSV->isGOT() || PSV->isConstantPool())
return true;

return false;
}

// This function iterates through all the operands of the input store MI and
// checks that each register operand statisfies isCallerPreservedPhysReg.
// This means, the value being stored and the address where it is being stored
Expand Down Expand Up @@ -1744,13 +1759,97 @@ bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
return Ratio > BlockFrequencyRatioThreshold;
}

/// Rematerialize instructions into cycles before Machine LICM,
/// since LICM in the middle-end hoisted every instructions without considering
/// register pressure.
bool MachineLICMImpl::rematerializeIntoCycle(MachineCycle *Cycle,
MachineInstr &I) {
LLVM_DEBUG(dbgs() << "Rematerialization: Finding sink block for: " << I);
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
assert(Preheader && "Cycle sink needs a preheader block");
MachineBasicBlock *SinkBlock = nullptr;
const MachineOperand &MO = I.getOperand(0);
Copy link

@sharkautarch sharkautarch Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this is only checking the first def of MachineInstr I, and, unlike in aggressivelySinkIntoCycle(), instruction candidates (for rematerializeIntoCycle()) with more than one non-dead defs aren't rejected, there's probably a correctness issue here.

You should change this to either account for all non-dead defs of I, or simply do an early-return if I has more than one non-dead defs. For the latter approach, you can simply copy my code here: main...sharkautarch:llvm-project:MIRAggrSink_ignoreDeadDefs

for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
LLVM_DEBUG(dbgs() << "Rematerialization: Analysing use: " << MI);
if (!Cycle->contains(MI.getParent())) {
LLVM_DEBUG(
dbgs() << "Rematerialization: Use not in cycle, can't sink.\n");
return false;
}
if (!SinkBlock) {
SinkBlock = MI.getParent();
LLVM_DEBUG(dbgs() << "Rematerialization: Setting sink block to: "
<< printMBBReference(*SinkBlock) << "\n");
continue;
}
if (MI.isPHI()) {
for (unsigned I = 1; I != MI.getNumOperands(); I += 2) {
Register SrcReg = MI.getOperand(I).getReg();
if (TRI->regsOverlap(SrcReg, MO.getReg())) {
MachineBasicBlock *SrcBB = MI.getOperand(I + 1).getMBB();
if (SrcBB != SinkBlock) {
SinkBlock =
MDTU->getDomTree().findNearestCommonDominator(SinkBlock, SrcBB);
if (!SinkBlock)
break;
}
}
}
} else {
SinkBlock = MDTU->getDomTree().findNearestCommonDominator(SinkBlock,
MI.getParent());
}
if (!SinkBlock) {
LLVM_DEBUG(
dbgs() << "Rematerialization: Can't find nearest dominator\n");
return false;
}
LLVM_DEBUG(
dbgs() << "Rematerialization: Setting nearest common dom block: "
<< printMBBReference(*SinkBlock) << "\n");
}
if (!SinkBlock) {
LLVM_DEBUG(
dbgs() << "Rematerialization: Not sinking, can't find sink block.\n");
return false;
}
if (SinkBlock == Preheader) {
LLVM_DEBUG(
dbgs()
<< "Rematerialization: Not sinking, sink block is the preheader\n");
return false;
}
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
if (MI.isPHI() && MI.getParent() == SinkBlock) {
LLVM_DEBUG(dbgs() << "Rematerialization: Not sinking, sink block is "
"using it on PHI.\n");
return false;
}
}
LLVM_DEBUG(dbgs() << "Rematerialization: Sinking instruction!\n");
SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
I);
// Conservatively clear any kill flags on uses of sunk instruction
for (MachineOperand &MO : I.operands()) {
if (MO.isReg() && MO.readsReg())
MRI->clearKillFlags(MO.getReg());
}
// The instruction is moved from its basic block, so do not retain the
// debug information.
assert(!I.isDebugInstr() && "Should not sink debug inst");
I.setDebugLoc(DebugLoc());
return true;
}

template <typename DerivedT, bool PreRegAlloc>
PreservedAnalyses MachineLICMBasePass<DerivedT, PreRegAlloc>::run(
MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) {
bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);
if (!Changed)
return PreservedAnalyses::all();
auto PA = getMachineFunctionPassPreservedAnalyses();
if (PreRegAlloc)
PA.preserve<MachineCycleAnalysis>();
PA.preserve<MachineLoopAnalysis>();
return PA;
}
Expand Down
Loading
Loading