1515#include " GCNSubtarget.h"
1616#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1717#include " llvm/CodeGen/MachineFunctionPass.h"
18+ #include " llvm/CodeGen/TargetSchedule.h"
19+ #include " llvm/Support/BranchProbability.h"
1820
1921using namespace llvm ;
2022
2123#define DEBUG_TYPE " si-pre-emit-peephole"
2224
23- static unsigned SkipThreshold;
24-
25- static cl::opt<unsigned , true > SkipThresholdFlag (
26- " amdgpu-skip-threshold" , cl::Hidden,
27- cl::desc (
28- " Number of instructions before jumping over divergent control flow" ),
29- cl::location(SkipThreshold), cl::init(12 ));
30-
3125namespace {
3226
3327class SIPreEmitPeephole : public MachineFunctionPass {
@@ -41,7 +35,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4135 MachineBasicBlock *&TrueMBB,
4236 MachineBasicBlock *&FalseMBB,
4337 SmallVectorImpl<MachineOperand> &Cond);
44- bool mustRetainExeczBranch (const MachineBasicBlock &From,
38+ bool mustRetainExeczBranch (const MachineBasicBlock &Head,
39+ const MachineBasicBlock &From,
4540 const MachineBasicBlock &To) const ;
4641 bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
4742
@@ -304,11 +299,58 @@ bool SIPreEmitPeephole::getBlockDestinations(
304299 return true ;
305300}
306301
302+ namespace {
303+ class BranchWeightCostModel {
304+ const SIInstrInfo &TII;
305+ const TargetSchedModel &SchedModel;
306+ BranchProbability BranchProb;
307+ uint64_t BranchCost;
308+ uint64_t ThenCyclesCost = 0 ;
309+
310+ public:
311+ BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
312+ const MachineBasicBlock &Succ)
313+ : TII(TII), SchedModel(TII.getSchedModel()) {
314+ assert (SchedModel.hasInstrSchedModelOrItineraries ());
315+
316+ const MachineBasicBlock &Head = *Branch.getParent ();
317+ const auto *FromIt = find (Head.successors (), &Succ);
318+ assert (FromIt != Head.succ_end ());
319+
320+ BranchProb = Head.getSuccProbability (FromIt);
321+ assert (!BranchProb.isUnknown ());
322+ BranchCost = SchedModel.computeInstrLatency (&Branch, false );
323+ }
324+
325+ bool isProfitable (const MachineInstr &MI) {
326+ if (TII.isWaitcnt (MI.getOpcode ()))
327+ return false ;
328+
329+ ThenCyclesCost += SchedModel.computeInstrLatency (&MI, false );
330+
331+ // Consider `P = N/D` to be the probability of execz being true
332+ // The transformation is profitable if always executing the 'then' block
333+ // is cheaper than executing sometimes 'then' and always
334+ // executing s_cbranch_execz:
335+ // * ThenCost <= P*ThenCost + BranchCost
336+ // * (1-P) * ThenCost <= BranchCost
337+ // * (D-N)/D * ThenCost <= BranchCost
338+ uint64_t Numerator = BranchProb.getNumerator ();
339+ uint64_t Denominator = BranchProb.getDenominator ();
340+ return (Denominator - Numerator) * ThenCyclesCost <=
341+ Denominator * BranchCost;
342+ }
343+ };
344+
307345bool SIPreEmitPeephole::mustRetainExeczBranch (
308- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
309- unsigned NumInstr = 0 ;
310- const MachineFunction *MF = From.getParent ();
346+ const MachineBasicBlock &Head, const MachineBasicBlock &From,
347+ const MachineBasicBlock &To) const {
348+
349+ assert (is_contained (Head.successors (), &From));
350+
351+ BranchWeightCostModel CostModel{*TII, *Head.getFirstTerminator (), From};
311352
353+ const MachineFunction *MF = From.getParent ();
312354 for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
313355 MBBI != End && MBBI != ToI; ++MBBI) {
314356 const MachineBasicBlock &MBB = *MBBI;
@@ -326,23 +368,22 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326368 if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
327369 return true ;
328370
329- // These instructions are potentially expensive even if EXEC = 0.
330- if (TII->isSMRD (MI) || TII->isVMEM (MI) || TII->isFLAT (MI) ||
331- TII->isDS (MI) || TII->isWaitcnt (MI.getOpcode ()))
332- return true ;
333-
334- ++NumInstr;
335- if (NumInstr >= SkipThreshold)
371+ if (!CostModel.isProfitable (MI))
336372 return true ;
337373 }
338374 }
339375
340376 return false ;
341377}
378+ } // namespace
342379
343380// Returns true if the skip branch instruction is removed.
344381bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
345382 MachineBasicBlock &SrcMBB) {
383+
384+ if (!TII->getSchedModel ().hasInstrSchedModelOrItineraries ())
385+ return false ;
386+
346387 MachineBasicBlock *TrueMBB = nullptr ;
347388 MachineBasicBlock *FalseMBB = nullptr ;
348389 SmallVector<MachineOperand, 1 > Cond;
@@ -351,8 +392,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351392 return false ;
352393
353394 // Consider only the forward branches.
354- if ((SrcMBB.getNumber () >= TrueMBB->getNumber ()) ||
355- mustRetainExeczBranch (*FalseMBB, *TrueMBB))
395+ if (SrcMBB.getNumber () >= TrueMBB->getNumber ())
396+ return false ;
397+
398+ // Consider only when it is legal and profitable
399+ if (mustRetainExeczBranch (SrcMBB, *FalseMBB, *TrueMBB))
356400 return false ;
357401
358402 LLVM_DEBUG (dbgs () << " Removing the execz branch: " << MI);
0 commit comments