1515#include " GCNSubtarget.h"
1616#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1717#include " llvm/CodeGen/MachineFunctionPass.h"
18+ #include " llvm/CodeGen/MachineInstr.h"
19+ #include " llvm/CodeGen/MachineTraceMetrics.h"
1820#include " llvm/CodeGen/TargetSchedule.h"
21+ #include " llvm/InitializePasses.h"
1922#include " llvm/Support/BranchProbability.h"
2023
2124using namespace llvm ;
@@ -29,6 +32,13 @@ class SIPreEmitPeephole : public MachineFunctionPass {
2932 const SIInstrInfo *TII = nullptr ;
3033 const SIRegisterInfo *TRI = nullptr ;
3134
35+ // Trace metrics analysis result, used to estimate the number of cycles it
36+ // takes to execute a block. For simplicity, initialized with TS_Local
37+ // strategy for the traces to have a single block. Then, getCriticalPath and
38+ // getResourceDepth give the results for a single block (instead of for a
39+ // whole trace).
40+ MachineTraceMetrics::Ensemble *Traces;
41+
3242 bool optimizeVccBranch (MachineInstr &MI) const ;
3343 bool optimizeSetGPR (MachineInstr &First, MachineInstr &MI) const ;
3444 bool getBlockDestinations (MachineBasicBlock &SrcMBB,
@@ -37,9 +47,14 @@ class SIPreEmitPeephole : public MachineFunctionPass {
3747 SmallVectorImpl<MachineOperand> &Cond);
3848 bool mustRetainExeczBranch (const MachineInstr &Branch,
3949 const MachineBasicBlock &From,
40- const MachineBasicBlock &To) const ;
50+ const MachineBasicBlock &To);
4151 bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
4252
53+ void getAnalysisUsage (AnalysisUsage &AU) const override {
54+ AU.addRequired <MachineTraceMetrics>();
55+ MachineFunctionPass::getAnalysisUsage (AU);
56+ }
57+
4358public:
4459 static char ID;
4560
@@ -52,8 +67,11 @@ class SIPreEmitPeephole : public MachineFunctionPass {
5267
5368} // End anonymous namespace.
5469
55- INITIALIZE_PASS (SIPreEmitPeephole, DEBUG_TYPE,
56- " SI peephole optimizations" , false , false )
70+ INITIALIZE_PASS_BEGIN (SIPreEmitPeephole, DEBUG_TYPE,
71+ " SI peephole optimizations" , false , false )
72+ INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
73+ INITIALIZE_PASS_END(SIPreEmitPeephole, DEBUG_TYPE, " SI peephole optimizations" ,
74+ false , false )
5775
5876char SIPreEmitPeephole::ID = 0;
5977
@@ -299,60 +317,23 @@ bool SIPreEmitPeephole::getBlockDestinations(
299317 return true ;
300318}
301319
302- namespace {
303- class BranchWeightCostModel {
304- const SIInstrInfo &TII;
305- const TargetSchedModel &SchedModel;
306- BranchProbability BranchProb;
307- static constexpr uint64_t BranchNotTakenCost = 1 ;
308- uint64_t BranchTakenCost;
309- uint64_t ThenCyclesCost = 0 ;
310-
311- public:
312- BranchWeightCostModel (const SIInstrInfo &TII, const MachineInstr &Branch,
313- const MachineBasicBlock &Succ)
314- : TII(TII), SchedModel(TII.getSchedModel()) {
315- const MachineBasicBlock &Head = *Branch.getParent ();
316- const auto *FromIt = find (Head.successors (), &Succ);
317- assert (FromIt != Head.succ_end ());
318-
319- BranchProb = Head.getSuccProbability (FromIt);
320- if (BranchProb.isUnknown ())
321- BranchProb = BranchProbability::getZero ();
322- BranchTakenCost = SchedModel.computeInstrLatency (&Branch, false );
323- }
324-
325- bool isProfitable (const MachineInstr &MI) {
326- if (TII.isWaitcnt (MI.getOpcode ()))
327- return false ;
328-
329- ThenCyclesCost += SchedModel.computeInstrLatency (&MI, false );
330-
331- // Consider `P = N/D` to be the probability of execz being false (skipping
332- // the then-block) The transformation is profitable if always executing the
333- // 'then' block is cheaper than executing sometimes 'then' and always
334- // executing s_cbranch_execz:
335- // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
336- // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
337- // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
338- // BranchNotTakenCost
339- uint64_t Numerator = BranchProb.getNumerator ();
340- uint64_t Denominator = BranchProb.getDenominator ();
341- return (Denominator - Numerator) * ThenCyclesCost <=
342- ((Denominator - Numerator) * BranchTakenCost +
343- Numerator * BranchNotTakenCost);
344- }
345- };
346-
347- bool SIPreEmitPeephole::mustRetainExeczBranch (
348- const MachineInstr &Branch, const MachineBasicBlock &From,
349- const MachineBasicBlock &To) const {
320+ bool SIPreEmitPeephole::mustRetainExeczBranch (const MachineInstr &Branch,
321+ const MachineBasicBlock &From,
322+ const MachineBasicBlock &To) {
350323
351324 const MachineBasicBlock &Head = *Branch.getParent ();
352- assert (is_contained (Head.successors (), &From));
325+ const auto *FromIt = find (Head.successors (), &From);
326+ assert (FromIt != Head.succ_end ());
327+
328+ auto BranchProb = Head.getSuccProbability (FromIt);
329+ if (BranchProb.isUnknown ())
330+ return false ;
353331
354- BranchWeightCostModel CostModel{*TII, Branch, From};
332+ uint64_t BranchTakenCost =
333+ TII->getSchedModel ().computeInstrLatency (&Branch, false );
334+ constexpr uint64_t BranchNotTakenCost = 1 ;
355335
336+ unsigned ThenCyclesCost = 0 ;
356337 const MachineFunction *MF = From.getParent ();
357338 for (MachineFunction::const_iterator MBBI (&From), ToI (&To), End = MF->end ();
358339 MBBI != End && MBBI != ToI; ++MBBI) {
@@ -371,14 +352,33 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
371352 if (TII->hasUnwantedEffectsWhenEXECEmpty (MI))
372353 return true ;
373354
374- if (!CostModel. isProfitable (MI))
355+ if (TII-> isWaitcnt (MI. getOpcode () ))
375356 return true ;
376357 }
358+
359+ MachineTraceMetrics::Trace Trace = Traces->getTrace (&From);
360+ ThenCyclesCost +=
361+ std::max (Trace.getCriticalPath (), Trace.getResourceDepth (true ));
362+
363+ // Consider `P = N/D` to be the probability of execz being false (skipping
364+ // the then-block) The transformation is profitable if always executing the
365+ // 'then' block is cheaper than executing sometimes 'then' and always
366+ // executing s_cbranch_execz:
367+ // * ThenCost <= P*ThenCost + (1-P)*BranchTakenCost + P*BranchNotTakenCost
368+ // * (1-P) * ThenCost <= (1-P)*BranchTakenCost + P*BranchNotTakenCost
369+ // * (D-N)/D * ThenCost <= (D-N)/D * BranchTakenCost + N/D *
370+ // BranchNotTakenCost
371+ uint64_t Numerator = BranchProb.getNumerator ();
372+ uint64_t Denominator = BranchProb.getDenominator ();
373+ bool IsProfitable = (Denominator - Numerator) * ThenCyclesCost <=
374+ ((Denominator - Numerator) * BranchTakenCost +
375+ Numerator * BranchNotTakenCost);
376+ if (!IsProfitable)
377+ return true ;
377378 }
378379
379380 return false ;
380381}
381- } // namespace
382382
383383// Returns true if the skip branch instruction is removed.
384384bool SIPreEmitPeephole::removeExeczBranch (MachineInstr &MI,
@@ -413,6 +413,8 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
413413 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
414414 TII = ST.getInstrInfo ();
415415 TRI = &TII->getRegisterInfo ();
416+ Traces = getAnalysis<MachineTraceMetrics>().getEnsemble (
417+ llvm::MachineTraceStrategy::TS_Local);
416418 bool Changed = false ;
417419
418420 MF.RenumberBlocks ();
0 commit comments