5353#include " llvm/ADT/STLExtras.h"
5454#include " llvm/ADT/SmallPtrSet.h"
5555#include " llvm/ADT/Statistic.h"
56+ #include " llvm/Analysis/BlockFrequencyInfo.h"
5657#include " llvm/Analysis/DomTreeUpdater.h"
5758#include " llvm/Analysis/GlobalsModRef.h"
5859#include " llvm/Analysis/InstructionSimplify.h"
7576#include " llvm/IR/Module.h"
7677#include " llvm/InitializePasses.h"
7778#include " llvm/Pass.h"
79+ #include " llvm/Support/CommandLine.h"
7880#include " llvm/Support/Debug.h"
7981#include " llvm/Support/raw_ostream.h"
8082#include " llvm/Transforms/Scalar.h"
@@ -87,6 +89,11 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
8789STATISTIC (NumRetDuped, " Number of return duplicated" );
8890STATISTIC (NumAccumAdded, " Number of accumulators introduced" );
8991
92+ static cl::opt<bool > ForceDisableBFI (
93+ " tre-disable-entrycount-recompute" , cl::init(false ), cl::Hidden,
94+ cl::desc(" Force disabling recomputing of function entry count, on "
95+ " successful tail recursion elimination." ));
96+
9097// / Scan the specified function for alloca instructions.
9198// / If it contains any dynamic allocas, returns false.
9299static bool canTRE (Function &F) {
@@ -409,6 +416,8 @@ class TailRecursionEliminator {
409416 AliasAnalysis *AA;
410417 OptimizationRemarkEmitter *ORE;
411418 DomTreeUpdater &DTU;
419+ const uint64_t OrigEntryBBFreq;
420+ DenseMap<const BasicBlock *, uint64_t > OriginalBBFreqs;
412421
413422 // The below are shared state we want to have available when eliminating any
414423 // calls in the function. There values should be populated by
@@ -438,8 +447,23 @@ class TailRecursionEliminator {
438447
439448 TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
440449 AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
441- DomTreeUpdater &DTU)
442- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
450+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
451+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU),
452+ OrigEntryBBFreq (
453+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U) {
454+ if (BFI) {
455+ auto EC = F.getEntryCount ();
456+ (void )EC;
457+ assert (
458+ (EC.has_value () && EC->getCount () != 0 && OrigEntryBBFreq) &&
459+ " If the function has an entry count, its entry basic block should "
460+ " have a non-zero frequency. Pass a nullptr BFI if the function has "
461+ " no entry count" );
462+
463+ for (const auto &BB : F)
464+ OriginalBBFreqs.insert ({&BB, BFI->getBlockFreq (&BB).getFrequency ()});
465+ }
466+ }
443467
444468 CallInst *findTRECandidate (BasicBlock *BB);
445469
@@ -460,7 +484,7 @@ class TailRecursionEliminator {
460484public:
461485 static bool eliminate (Function &F, const TargetTransformInfo *TTI,
462486 AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
463- DomTreeUpdater &DTU);
487+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
464488};
465489} // namespace
466490
@@ -746,6 +770,17 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
746770 CI->eraseFromParent (); // Remove call.
747771 DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
748772 ++NumEliminated;
773+ if (OrigEntryBBFreq) {
774+ assert (F.getEntryCount ().has_value ());
775+ auto It = OriginalBBFreqs.find (BB);
776+ assert (It != OriginalBBFreqs.end ());
777+ auto RelativeBBFreq =
778+ static_cast <double >(It->second ) / static_cast <double >(OrigEntryBBFreq);
779+ auto OldEntryCount = F.getEntryCount ()->getCount ();
780+ auto ToSubtract = static_cast <uint64_t >(RelativeBBFreq * OldEntryCount);
781+ assert (OldEntryCount > ToSubtract);
782+ F.setEntryCount (OldEntryCount - ToSubtract, F.getEntryCount ()->getType ());
783+ }
749784 return true ;
750785}
751786
@@ -872,7 +907,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
872907 const TargetTransformInfo *TTI,
873908 AliasAnalysis *AA,
874909 OptimizationRemarkEmitter *ORE,
875- DomTreeUpdater &DTU) {
910+ DomTreeUpdater &DTU,
911+ BlockFrequencyInfo *BFI) {
876912 if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
877913 return false ;
878914
@@ -888,7 +924,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
888924 return MadeChange;
889925
890926 // Change any tail recursive calls to loops.
891- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
927+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
892928
893929 for (BasicBlock &BB : F)
894930 MadeChange |= TRE.processBlock (BB);
@@ -930,7 +966,8 @@ struct TailCallElim : public FunctionPass {
930966 return TailRecursionEliminator::eliminate (
931967 F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
932968 &getAnalysis<AAResultsWrapperPass>().getAAResults (),
933- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
969+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
970+ nullptr );
934971 }
935972};
936973}
@@ -953,14 +990,22 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
953990
954991 TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
955992 AliasAnalysis &AA = AM.getResult <AAManager>(F);
993+ // This must come first. It needs the 2 analyses, meaning, if it came after
994+ // the lines asking for the cached result, should they be nullptr (which, in
995+ // the case of the PDT, is likely), updates to the trees would be missed.
996+ auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount &&
997+ F.getEntryCount ().has_value () && F.getEntryCount ()->getCount ())
998+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
999+ : nullptr ;
9561000 auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
9571001 auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
9581002 auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
9591003 // There is no noticable performance difference here between Lazy and Eager
9601004 // UpdateStrategy based on some test results. It is feasible to switch the
9611005 // UpdateStrategy to Lazy if we find it profitable later.
9621006 DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
963- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
1007+ bool Changed =
1008+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
9641009
9651010 if (!Changed)
9661011 return PreservedAnalyses::all ();
0 commit comments