1515#include " GCNSubtarget.h"
1616#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1717#include " llvm/CodeGen/MachineFunctionPass.h"
18+ #include " llvm/CodeGen/TargetSchedule.h"
19+ #include " llvm/Support/BranchProbability.h"
1820
1921using namespace llvm ;
2022
2123#define DEBUG_TYPE " si-pre-emit-peephole"
2224
23- static unsigned SkipThreshold;
24-
25- static cl::opt<unsigned , true > SkipThresholdFlag (
26- " amdgpu-skip-threshold" , cl::Hidden,
27- cl::desc (
28- " Number of instructions before jumping over divergent control flow" ),
29- cl::location(SkipThreshold), cl::init(12 ));
30-
3125namespace {
3226
3327class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4135 MachineBasicBlock *&TrueMBB,
4236 MachineBasicBlock *&FalseMBB,
4337 SmallVectorImpl<MachineOperand> &Cond);
44- bool mustRetainExeczBranch (const MachineBasicBlock &From,
38+ bool mustRetainExeczBranch (const MachineBasicBlock &Head,
39+ const MachineBasicBlock &From,
4540 const MachineBasicBlock &To) const ;
4641 bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
4742
@@ -304,11 +299,67 @@ bool SIPreEmitPeephole::getBlockDestinations(
304299 return true ;
305300}
306301
302+ namespace {
303+ class BranchWeightCostModel {
304+ const SIInstrInfo &TII;
305+ const TargetSchedModel &SchedModel;
306+ BranchProbability BranchProb;
307+ uint64_t BranchCost;
308+ uint64_t ThenCyclesCost = 0 ;
309+
310+ public:
311+ BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
312+ const MachineBasicBlock &Succ)
313+ : TII(TII), SchedModel(TII.getSchedModel()) {
314+ assert (SchedModel.hasInstrSchedModelOrItineraries ());
315+
316+ const MachineBasicBlock &Head = *Branch.getParent ();
317+ const auto *FromIt = find (Head.successors (), &Succ);
318+ assert (FromIt != Head.succ_end ());
319+
320+ BranchProb = Head.getSuccProbability (FromIt);
321+ if (BranchProb.isUnknown ())
322+ return ;
323+
324+ BranchCost = SchedModel.computeInstrLatency (&Branch, false );
325+ }
326+
327+ bool isUnknown () const { return BranchProb.isUnknown (); }
328+
329+ bool isProfitable (const MachineInstr &MI) {
330+ assert (!isUnknown ());
331+
332+ if (TII.isWaitcnt (MI.getOpcode ()))
333+ return false ;
334+
335+ ThenCyclesCost += SchedModel.computeInstrLatency (&MI, false );
336+
337+ // Consider `P = N/D` to be the probability of execnz being true
338+ // The transformation is profitable if always executing the 'then' block
339+ // is cheaper than executing sometimes 'then' and always
340+ // executing s_cbranch_execnz:
341+ // * ThenCost <= P*ThenCost + BranchCost
342+ // * (1-P) * ThenCost <= BranchCost
343+ // * (D-N)/D * ThenCost <= BranchCost
344+ uint64_t Numerator = BranchProb.getNumerator ();
345+ uint64_t Denominator = BranchProb.getDenominator ();
346+ return (Denominator - Numerator) * ThenCyclesCost <=
347+ Denominator * BranchCost;
348+ }
349+ };
350+
307351bool SIPreEmitPeephole::mustRetainExeczBranch (
308- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309- unsigned NumInstr = 0 ;
310- const MachineFunction *MF = From.getParent ();
352+ const MachineBasicBlock &Head, const MachineBasicBlock &From,
353+ const MachineBasicBlock &To) const {
354+
355+ const auto *FromIt = find (Head.successors (), &From);
356+ assert (FromIt != Head.succ_end ());
311357
358+ BranchWeightCostModel CostModel{*TII, *Head.getFirstTerminator (), From};
359+ if (CostModel.isUnknown ())
360+ return true ;
361+
362+ const MachineFunction *MF = From.getParent ();
312363 for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
313364 MBBI != End && MBBI != ToI; ++MBBI) {
314365 const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +377,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326377 if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
327378 return true ;
328379
329- // These instructions are potentially expensive even if EXEC = 0.
330- if (TII->isSMRD (MI) || TII->isVMEM (MI) || TII->isFLAT (MI) ||
331- TII->isDS (MI) || TII->isWaitcnt (MI.getOpcode ()))
332- return true ;
333-
334- ++NumInstr;
335- if (NumInstr >= SkipThreshold)
380+ if (!CostModel.isProfitable (MI))
336381 return true ;
337382 }
338383 }
339384
340385 return false ;
341386}
387+ } // namespace
342388
343389// Returns true if the skip branch instruction is removed.
344390bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
345391 MachineBasicBlock &SrcMBB) {
392+
393+ if (!TII->getSchedModel ().hasInstrSchedModelOrItineraries ())
394+ return false ;
395+
346396 MachineBasicBlock *TrueMBB = nullptr ;
347397 MachineBasicBlock *FalseMBB = nullptr ;
348398 SmallVector<MachineOperand, 1 > Cond;
@@ -351,8 +401,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351401 return false ;
352402
353403 // Consider only the forward branches.
354- if ((SrcMBB.getNumber () >= TrueMBB->getNumber ()) ||
355- mustRetainExeczBranch (*FalseMBB, *TrueMBB))
404+ if (SrcMBB.getNumber () >= TrueMBB->getNumber ())
405+ return false ;
406+
407+ // Consider only when it is legal and profitable
408+ if (mustRetainExeczBranch (SrcMBB, *FalseMBB, *TrueMBB))
356409 return false ;
357410
358411 LLVM_DEBUG (dbgs () << " Removing the execz branch: " << MI);
0 commit comments