1515#include " GCNSubtarget.h"
1616#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1717#include " llvm/CodeGen/MachineFunctionPass.h"
18+ #include " llvm/CodeGen/TargetSchedule.h"
19+ #include " llvm/Support/BranchProbability.h"
1820
1921using namespace llvm ;
2022
2123#define DEBUG_TYPE " si-pre-emit-peephole"
2224
23- static unsigned SkipThreshold;
24-
25- static cl::opt<unsigned , true > SkipThresholdFlag (
26- " amdgpu-skip-threshold" , cl::Hidden,
27- cl::desc (
28- " Number of instructions before jumping over divergent control flow" ),
29- cl::location(SkipThreshold), cl::init(12 ));
30-
3125namespace {
3226
3327class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4135 MachineBasicBlock *&TrueMBB,
4236 MachineBasicBlock *&FalseMBB,
4337 SmallVectorImpl<MachineOperand> &Cond);
44- bool mustRetainExeczBranch (const MachineBasicBlock &From,
38+ bool mustRetainExeczBranch (const MachineInstr &Branch,
39+ const MachineBasicBlock &From,
4540 const MachineBasicBlock &To) const ;
4641 bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
4742
@@ -304,11 +299,61 @@ bool SIPreEmitPeephole::getBlockDestinations(
304299 return true ;
305300}
306301
302+ namespace {
303+ class BranchWeightCostModel {
304+ const SIInstrInfo &TII;
305+ const TargetSchedModel &SchedModel;
306+ BranchProbability BranchProb;
307+ static constexpr uint64_t BranchNotTakenCost = 1 ;
308+ uint64_t BranchTakenCost;
309+ uint64_t ThenCyclesCost = 0 ;
310+
311+ public:
312+ BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313+ const MachineBasicBlock &Succ)
314+ : TII(TII), SchedModel(TII.getSchedModel()) {
315+ const MachineBasicBlock &Head = *Branch.getParent ();
316+ const auto FromIt = find (Head.successors (), &Succ);
317+ assert (FromIt != Head.succ_end ());
318+
319+ BranchProb = Head.getSuccProbability (FromIt);
320+ if (BranchProb.isUnknown ())
321+ BranchProb = BranchProbability::getZero ();
322+ BranchTakenCost = SchedModel.computeInstrLatency (&Branch);
323+ }
324+
325+ bool isProfitable (const MachineInstr &MI) {
326+ if (TII.isWaitcnt (MI.getOpcode ()))
327+ return false ;
328+
329+ ThenCyclesCost += SchedModel.computeInstrLatency (&MI);
330+
331+ // Consider `P = N/D` to be the probability of execz being false (skipping
332+ // the then-block) The transformation is profitable if always executing the
333+ // 'then' block is cheaper than executing sometimes 'then' and always
334+ // executing s_cbranch_execz:
335+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338+ // BranchNotTakenCost
339+ uint64_t Numerator = BranchProb.getNumerator ();
340+ uint64_t Denominator = BranchProb.getDenominator ();
341+ return (Denominator - Numerator) * ThenCyclesCost <=
342+ ((Denominator - Numerator) * BranchTakenCost +
343+ Numerator * BranchNotTakenCost);
344+ }
345+ };
346+
307347bool SIPreEmitPeephole::mustRetainExeczBranch (
308- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309- unsigned NumInstr = 0 ;
310- const MachineFunction *MF = From.getParent ();
348+ const MachineInstr &Branch, const MachineBasicBlock &From,
349+ const MachineBasicBlock &To) const {
350+
351+ const MachineBasicBlock &Head = *Branch.getParent ();
352+ assert (is_contained (Head.successors (), &From));
353+
354+ BranchWeightCostModel CostModel{*TII, Branch, From};
311355
356+ const MachineFunction *MF = From.getParent ();
312357 for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
313358 MBBI != End && MBBI != ToI; ++MBBI) {
314359 const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +371,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326371 if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
327372 return true ;
328373
329- // These instructions are potentially expensive even if EXEC = 0.
330- if (TII->isSMRD (MI) || TII->isVMEM (MI) || TII->isFLAT (MI) ||
331- TII->isDS (MI) || TII->isWaitcnt (MI.getOpcode ()))
332- return true ;
333-
334- ++NumInstr;
335- if (NumInstr >= SkipThreshold)
374+ if (!CostModel.isProfitable (MI))
336375 return true ;
337376 }
338377 }
339378
340379 return false ;
341380}
381+ } // namespace
342382
343383// Returns true if the skip branch instruction is removed.
344384bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
345385 MachineBasicBlock &SrcMBB) {
386+
387+ if (!TII->getSchedModel ().hasInstrSchedModel ())
388+ return false ;
389+
346390 MachineBasicBlock *TrueMBB = nullptr ;
347391 MachineBasicBlock *FalseMBB = nullptr ;
348392 SmallVector<MachineOperand, 1 > Cond;
@@ -351,8 +395,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351395 return false ;
352396
353397 // Consider only the forward branches.
354- if ((SrcMBB.getNumber () >= TrueMBB->getNumber ()) ||
355- mustRetainExeczBranch (*FalseMBB, *TrueMBB))
398+ if (SrcMBB.getNumber () >= TrueMBB->getNumber ())
399+ return false ;
400+
401+ // Consider only when it is legal and profitable
402+ if (mustRetainExeczBranch (MI, *FalseMBB, *TrueMBB))
356403 return false ;
357404
358405 LLVM_DEBUG (dbgs () << " Removing the execz branch: " << MI);
0 commit comments