1515#include " GCNSubtarget.h"
1616#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1717#include " llvm/CodeGen/MachineFunctionPass.h"
18+ #include " llvm/CodeGen/TargetSchedule.h"
19+ #include " llvm/Support/BranchProbability.h"
1820
1921using namespace llvm ;
2022
2123#define DEBUG_TYPE " si-pre-emit-peephole"
2224
23- static unsigned SkipThreshold;
24-
25- static cl::opt<unsigned , true > SkipThresholdFlag (
26- " amdgpu-skip-threshold" , cl::Hidden,
27- cl::desc (
28- " Number of instructions before jumping over divergent control flow" ),
29- cl::location(SkipThreshold), cl::init(12 ));
30-
3125namespace {
3226
3327class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4135 MachineBasicBlock *&TrueMBB,
4236 MachineBasicBlock *&FalseMBB,
4337 SmallVectorImpl<MachineOperand> &Cond);
44- bool mustRetainExeczBranch (const MachineBasicBlock &From,
38+ bool mustRetainExeczBranch (const MachineInstr &Branch,
39+ const MachineBasicBlock &From,
4540 const MachineBasicBlock &To) const ;
4641 bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
4742
@@ -304,11 +299,60 @@ bool SIPreEmitPeephole::getBlockDestinations(
304299 return true ;
305300}
306301
302+ namespace {
303+ class BranchWeightCostModel {
304+ const SIInstrInfo &TII;
305+ const TargetSchedModel &SchedModel;
306+ BranchProbability BranchProb;
307+ static constexpr uint64_t BranchNotTakenCost = 1 ;
308+ uint64_t BranchTakenCost;
309+ uint64_t ThenCyclesCost = 0 ;
310+
311+ public:
312+ BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313+ const MachineBasicBlock &Succ)
314+ : TII(TII), SchedModel(TII.getSchedModel()) {
315+ const MachineBasicBlock &Head = *Branch.getParent ();
316+ const auto *FromIt = find (Head.successors (), &Succ);
317+ assert (FromIt != Head.succ_end ());
318+
319+ BranchProb = Head.getSuccProbability (FromIt);
320+ assert (!BranchProb.isUnknown ());
321+ BranchTakenCost = SchedModel.computeInstrLatency (&Branch, false );
322+ }
323+
324+ bool isProfitable (const MachineInstr &MI) {
325+ if (TII.isWaitcnt (MI.getOpcode ()))
326+ return false ;
327+
328+ ThenCyclesCost += SchedModel.computeInstrLatency (&MI, false );
329+
330+ // Consider `P = N/D` to be the probability of execz being false (skipping
331+ // the then-block) The transformation is profitable if always executing the
332+ // 'then' block is cheaper than executing sometimes 'then' and always
333+ // executing s_cbranch_execz:
334+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNonTakenCost
335+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNonTakenCost
336+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
337+ // BranchNonTakenCost
338+ uint64_t Numerator = BranchProb.getNumerator ();
339+ uint64_t Denominator = BranchProb.getDenominator ();
340+ return (Denominator - Numerator) * ThenCyclesCost <=
341+ ((Denominator - Numerator) * BranchTakenCost +
342+ Numerator * BranchNotTakenCost);
343+ }
344+ };
345+
307346bool SIPreEmitPeephole::mustRetainExeczBranch (
308- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309- unsigned NumInstr = 0 ;
310- const MachineFunction *MF = From.getParent ();
347+ const MachineInstr &Branch, const MachineBasicBlock &From,
348+ const MachineBasicBlock &To) const {
349+
350+ const MachineBasicBlock &Head = *Branch.getParent ();
351+ assert (is_contained (Head.successors (), &From));
352+
353+ BranchWeightCostModel CostModel{*TII, Branch, From};
311354
355+ const MachineFunction *MF = From.getParent ();
312356 for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
313357 MBBI != End && MBBI != ToI; ++MBBI) {
314358 const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +370,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326370 if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
327371 return true ;
328372
329- // These instructions are potentially expensive even if EXEC = 0.
330- if (TII->isSMRD (MI) || TII->isVMEM (MI) || TII->isFLAT (MI) ||
331- TII->isDS (MI) || TII->isWaitcnt (MI.getOpcode ()))
332- return true ;
333-
334- ++NumInstr;
335- if (NumInstr >= SkipThreshold)
373+ if (!CostModel.isProfitable (MI))
336374 return true ;
337375 }
338376 }
339377
340378 return false ;
341379}
380+ } // namespace
342381
343382// Returns true if the skip branch instruction is removed.
344383bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
345384 MachineBasicBlock &SrcMBB) {
385+
386+ if (!TII->getSchedModel ().hasInstrSchedModelOrItineraries ())
387+ return false ;
388+
346389 MachineBasicBlock *TrueMBB = nullptr ;
347390 MachineBasicBlock *FalseMBB = nullptr ;
348391 SmallVector<MachineOperand, 1 > Cond;
@@ -351,8 +394,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351394 return false ;
352395
353396 // Consider only the forward branches.
354- if ((SrcMBB.getNumber () >= TrueMBB->getNumber ()) ||
355- mustRetainExeczBranch (*FalseMBB, *TrueMBB))
397+ if (SrcMBB.getNumber () >= TrueMBB->getNumber ())
398+ return false ;
399+
400+ // Consider only when it is legal and profitable
401+ if (mustRetainExeczBranch (MI, *FalseMBB, *TrueMBB))
356402 return false ;
357403
358404 LLVM_DEBUG (dbgs () << " Removing the execz branch: " << MI);
0 commit comments