From 8cb8bb00cce43634330626e5224cefb46696919c Mon Sep 17 00:00:00 2001 From: dianqk Date: Sat, 13 Sep 2025 21:40:55 +0800 Subject: [PATCH 1/2] [MachineLICM] Rematerialize instructions that may be hoisted before LICM --- .../llvm/CodeGen/MachineCycleAnalysis.h | 9 ++ llvm/lib/CodeGen/MachineCycleAnalysis.cpp | 45 ++++++ llvm/lib/CodeGen/MachineLICM.cpp | 135 +++++++++++++++--- llvm/lib/CodeGen/MachineSink.cpp | 67 +-------- 4 files changed, 175 insertions(+), 81 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h index 622f529939bcb..8382df36eed18 100644 --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -48,6 +48,15 @@ class LLVM_ABI MachineCycleInfoWrapperPass : public MachineFunctionPass { // version. LLVM_ABI bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); +/// Returns true if this machine instruction loads from global offset table or +/// constant pool. +bool mayLoadFromGOTOrConstantPool(MachineInstr &MI); + +/// Returns true if this machine instruction can be a sink candidate. +bool isSinkIntoCycleCandidate(MachineInstr &MI, MachineCycle *Cycle, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII); + class MachineCycleAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; LLVM_ABI static AnalysisKey Key; diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 33a5b664826b3..6ffdb1e59a18d 100644 --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -167,3 +167,48 @@ bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { // If we got this far, the instruction is cycle invariant! return true; } + +bool llvm::mayLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert(MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + + for (MachineMemOperand *MemOp : MI.memoperands()) + if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) + if (PSV->isGOT() || PSV->isConstantPool()) + return true; + + return false; +} + +bool llvm::isSinkIntoCycleCandidate(MachineInstr &MI, MachineCycle *Cycle, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + // Not sinking meta instruction. + if (MI.isMetaInstruction()) + return false; + // Instruction not a candidate for this target. + if (!TII->shouldSink(MI)) + return false; + // Instruction is not cycle invariant. + if (!isCycleInvariant(Cycle, MI)) + return false; + // Instruction not safe to move. + bool DontMoveAcrossStore = true; + if (!MI.isSafeToMove(DontMoveAcrossStore)) + return false; + // Dont sink GOT or constant pool loads. + if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) + return false; + if (MI.isConvergent()) + return false; + const MachineOperand &MO = MI.getOperand(0); + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + return false; + if (!MRI->hasOneDef(MO.getReg())) + return false; + return true; +} diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 4f164e2d53460..804a40612e85d 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -98,6 +98,12 @@ DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", clEnumValN(UseBFI::All, "all", "enable the feature with/wo profile data"))); +static cl::opt SinkInstsIntoCycleBeforeLICM( + "sink-insts-before-licm", + cl::desc("Sink instructions into cycles to avoid " + "register spills"), + cl::init(true), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -287,6 +293,8 @@ namespace { bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, MachineBasicBlock *TgtBlock); MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop); + + bool rematerializeIntoCycle(MachineCycle *Cycle, MachineInstr &I); }; class MachineLICMBase : public MachineFunctionPass { @@ -304,7 +312,11 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); + if (PreRegAlloc) + AU.addRequired(); AU.addPreserved(); + if (PreRegAlloc) + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -348,6 +360,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", "Early Machine Loop Invariant Code Motion", false, false) @@ -396,6 +409,26 @@ bool MachineLICMImpl::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { + if (SinkInstsIntoCycleBeforeLICM) { + auto *CI = GET_RESULT(MachineCycle, getCycleInfo, Info); + SmallVector Cycles(CI->toplevel_cycles()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "Rematerialization: Can't find preheader\n"); + continue; + } + SmallVector Candidates; + for (auto &MI : *Preheader) + if (isSinkIntoCycleCandidate(MI, Cycle, MRI, TII)) + Candidates.push_back(&MI); + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + for (MachineInstr *I : llvm::reverse(Candidates)) + if (rematerializeIntoCycle(Cycle, *I)) + Changed = true; + } + } // Estimate register pressure during pre-regalloc pass. unsigned NumRPS = TRI->getNumRegPressureSets(); RegPressure.resize(NumRPS); @@ -1005,24 +1038,6 @@ MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, return Cost; } -/// Return true if this machine instruction loads from global offset table or -/// constant pool. -static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert(MI.mayLoad() && "Expected MI that loads!"); - - // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. - if (MI.memoperands_empty()) - return true; - - for (MachineMemOperand *MemOp : MI.memoperands()) - if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) - if (PSV->isGOT() || PSV->isConstantPool()) - return true; - - return false; -} - // This function iterates through all the operands of the input store MI and // checks that each register operand statisfies isCallerPreservedPhysReg. // This means, the value being stored and the address where it is being stored @@ -1744,6 +1759,88 @@ bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, return Ratio > BlockFrequencyRatioThreshold; } +/// Rematerialize instructions into cycles before Machine LICM, +/// since LICM in the middle-end hoisted every instructions without considering +/// register pressure. +bool MachineLICMImpl::rematerializeIntoCycle(MachineCycle *Cycle, + MachineInstr &I) { + LLVM_DEBUG(dbgs() << "Rematerialization: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); + MachineBasicBlock *SinkBlock = nullptr; + const MachineOperand &MO = I.getOperand(0); + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "Rematerialization: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG( + dbgs() << "Rematerialization: Use not in cycle, can't sink.\n"); + return false; + } + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "Rematerialization: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); + continue; + } + if (MI.isPHI()) { + for (unsigned I = 1; I != MI.getNumOperands(); I += 2) { + Register SrcReg = MI.getOperand(I).getReg(); + if (TRI->regsOverlap(SrcReg, MO.getReg())) { + MachineBasicBlock *SrcBB = MI.getOperand(I + 1).getMBB(); + if (SrcBB != SinkBlock) { + SinkBlock = + MDTU->getDomTree().findNearestCommonDominator(SinkBlock, SrcBB); + if (!SinkBlock) + break; + } + } + } + } else { + SinkBlock = MDTU->getDomTree().findNearestCommonDominator(SinkBlock, + MI.getParent()); + } + if (!SinkBlock) { + LLVM_DEBUG( + dbgs() << "Rematerialization: Can't find nearest dominator\n"); + return false; + } + LLVM_DEBUG( + dbgs() << "Rematerialization: Setting nearest common dom block: " + << printMBBReference(*SinkBlock) << "\n"); + } + if (!SinkBlock) { + LLVM_DEBUG( + dbgs() << "Rematerialization: Not sinking, can't find sink block.\n"); + return false; + } + if (SinkBlock == Preheader) { + LLVM_DEBUG( + dbgs() + << "Rematerialization: Not sinking, sink block is the preheader\n"); + return false; + } + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + if (MI.isPHI() && MI.getParent() == SinkBlock) { + LLVM_DEBUG(dbgs() << "Rematerialization: Not sinking, sink block is " + "using it on PHI.\n"); + return false; + } + } + LLVM_DEBUG(dbgs() << "Rematerialization: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, + I); + // Conservatively clear any kill flags on uses of sunk instruction + for (MachineOperand &MO : I.operands()) { + if (MO.isReg() && MO.readsReg()) + MRI->clearKillFlags(MO.getReg()); + } + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!I.isDebugInstr() && "Should not sink debug inst"); + I.setDebugLoc(DebugLoc()); + return true; +} + template PreservedAnalyses MachineLICMBasePass::run( MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { @@ -1751,6 +1848,8 @@ PreservedAnalyses MachineLICMBasePass::run( if (!Changed) return PreservedAnalyses::all(); auto PA = getMachineFunctionPassPreservedAnalyses(); + if (PreRegAlloc) + PA.preserve(); PA.preserve(); return PA; } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 9ec5151a039b7..615edf6b414da 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -258,9 +259,6 @@ class MachineSinking { bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, - SmallVectorImpl &Candidates); - bool aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I, DenseMap &SunkInstrs); @@ -694,65 +692,6 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg, return true; } -/// Return true if this machine instruction loads from global offset table or -/// constant pool. -static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert(MI.mayLoad() && "Expected MI that loads!"); - - // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. - if (MI.memoperands_empty()) - return true; - - for (MachineMemOperand *MemOp : MI.memoperands()) - if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) - if (PSV->isGOT() || PSV->isConstantPool()) - return true; - - return false; -} - -void MachineSinking::FindCycleSinkCandidates( - MachineCycle *Cycle, MachineBasicBlock *BB, - SmallVectorImpl &Candidates) { - for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); - if (MI.isMetaInstruction()) { - LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n"); - continue; - } - if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " - "target\n"); - continue; - } - if (!isCycleInvariant(Cycle, MI)) { - LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); - continue; - } - bool DontMoveAcrossStore = true; - if (!MI.isSafeToMove(DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); - continue; - } - if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); - continue; - } - if (MI.isConvergent()) - continue; - - const MachineOperand &MO = MI.getOperand(0); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) - continue; - if (!MRI->hasOneDef(MO.getReg())) - continue; - - LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); - Candidates.push_back(&MI); - } -} - PreservedAnalyses MachineSinkingPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { @@ -892,7 +831,9 @@ bool MachineSinking::run(MachineFunction &MF) { continue; } SmallVector Candidates; - FindCycleSinkCandidates(Cycle, Preheader, Candidates); + for (auto &MI : *Preheader) + if (isSinkIntoCycleCandidate(MI, Cycle, MRI, TII)) + Candidates.push_back(&MI); unsigned i = 0; From 9ffa46a073111b09fe95ce8a1aaf7ced4d5e8377 Mon Sep 17 00:00:00 2001 From: dianqk Date: Sun, 14 Sep 2025 19:53:35 +0800 Subject: [PATCH 2/2] [MachineSink][Experiment] Rematerialize instructions that may be hoisted in LICM --- llvm/lib/CodeGen/MachineLICM.cpp | 2 +- llvm/lib/CodeGen/MachineSink.cpp | 134 +++++++++++++++++++++++++++---- 2 files changed, 118 insertions(+), 18 deletions(-) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 804a40612e85d..7300f092edacd 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -102,7 +102,7 @@ static cl::opt SinkInstsIntoCycleBeforeLICM( "sink-insts-before-licm", cl::desc("Sink instructions into cycles to avoid " "register spills"), - cl::init(true), cl::Hidden); + cl::init(false), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 615edf6b414da..2f2de3b42c7f9 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -106,7 +106,13 @@ static cl::opt SinkInstsIntoCycle("sink-insts-to-avoid-spills", cl::desc("Sink instructions into cycles to avoid " "register spills"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); + +static cl::opt AggressivelySinkInstsIntoCycle( + "aggressively-sink-insts-to-avoid-spills", + cl::desc("Aggressively sink instructions into cycles to avoid " + "register spills"), + cl::init(false), cl::Hidden); static cl::opt SinkIntoCycleLimit( "machine-sink-cycle-limit", @@ -263,6 +269,8 @@ class MachineSinking { aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I, DenseMap &SunkInstrs); + bool rematerializeIntoCycle(MachineCycle *Cycle, MachineInstr &I); + bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, @@ -815,21 +823,28 @@ bool MachineSinking::run(MachineFunction &MF) { if (SinkInstsIntoCycle) { SmallVector Cycles(CI->toplevel_cycles()); SchedModel.init(STI); - bool HasHighPressure; DenseMap SunkInstrs; - enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END }; - for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END; - ++Stage, SunkInstrs.clear()) { - HasHighPressure = false; + enum CycleSinkStage { + COPY, + LOW_LATENCY, + REMATERIALIZATION, + AGGRESSIVE, + END + }; + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); + continue; + } + bool HasHighPressure = registerPressureExceedsLimit(*Preheader); + if (!HasHighPressure) + continue; + for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END; + ++Stage, SunkInstrs.clear()) { - for (auto *Cycle : Cycles) { - MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); - if (!Preheader) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); - continue; - } SmallVector Candidates; for (auto &MI : *Preheader) if (isSinkIntoCycleCandidate(MI, Cycle, MRI, TII)) @@ -860,18 +875,23 @@ bool MachineSinking::run(MachineFunction &MF) { !TII->hasLowDefLatency(SchedModel, *I, 0)) continue; - if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs)) - continue; + if (Stage == CycleSinkStage::AGGRESSIVE && + AggressivelySinkInstsIntoCycle) { + if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs)) + continue; + } else { + if (!rematerializeIntoCycle(Cycle, *I)) + continue; + } EverMadeChange = true; ++NumCycleSunk; } // Recalculate the pressure after sinking + HasHighPressure = registerPressureExceedsLimit(*Preheader); if (!HasHighPressure) - HasHighPressure = registerPressureExceedsLimit(*Preheader); + break; } - if (!HasHighPressure) - break; } } @@ -1771,6 +1791,86 @@ bool MachineSinking::aggressivelySinkIntoCycle( return true; } +/// Rematerialize instructions into cycles, +/// since LICM in the middle-end hoisted every instructions without considering +/// register pressure. +bool MachineSinking::rematerializeIntoCycle(MachineCycle *Cycle, + MachineInstr &I) { + LLVM_DEBUG(dbgs() << "Rematerialization: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); + MachineBasicBlock *SinkBlock = nullptr; + const MachineOperand &MO = I.getOperand(0); + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + LLVM_DEBUG(dbgs() << "Rematerialization: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG( + dbgs() << "Rematerialization: Use not in cycle, can't sink.\n"); + return false; + } + if (!SinkBlock) { + SinkBlock = MI.getParent(); + LLVM_DEBUG(dbgs() << "Rematerialization: Setting sink block to: " + << printMBBReference(*SinkBlock) << "\n"); + continue; + } + if (MI.isPHI()) { + for (unsigned I = 1; I != MI.getNumOperands(); I += 2) { + Register SrcReg = MI.getOperand(I).getReg(); + if (TRI->regsOverlap(SrcReg, MO.getReg())) { + MachineBasicBlock *SrcBB = MI.getOperand(I + 1).getMBB(); + if (SrcBB != SinkBlock) { + SinkBlock = DT->findNearestCommonDominator(SinkBlock, SrcBB); + if (!SinkBlock) + break; + } + } + } + } else { + SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); + } + if (!SinkBlock) { + LLVM_DEBUG( + dbgs() << "Rematerialization: Can't find nearest dominator\n"); + return false; + } + LLVM_DEBUG( + dbgs() << "Rematerialization: Setting nearest common dom block: " + << printMBBReference(*SinkBlock) << "\n"); + } + if (!SinkBlock) { + LLVM_DEBUG( + dbgs() << "Rematerialization: Not sinking, can't find sink block.\n"); + return false; + } + if (SinkBlock == Preheader) { + LLVM_DEBUG( + dbgs() + << "Rematerialization: Not sinking, sink block is the preheader\n"); + return false; + } + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + if (MI.isPHI() && MI.getParent() == SinkBlock) { + LLVM_DEBUG(dbgs() << "Rematerialization: Not sinking, sink block is " + "using it on PHI.\n"); + return false; + } + } + LLVM_DEBUG(dbgs() << "Rematerialization: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, + I); + // Conservatively clear any kill flags on uses of sunk instruction + for (MachineOperand &MO : I.operands()) { + if (MO.isReg() && MO.readsReg()) + MRI->clearKillFlags(MO.getReg()); + } + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!I.isDebugInstr() && "Should not sink debug inst"); + I.setDebugLoc(DebugLoc()); + return true; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,