5353#include " llvm/ADT/STLExtras.h"
5454#include " llvm/ADT/SmallPtrSet.h"
5555#include " llvm/ADT/Statistic.h"
56+ #include " llvm/Analysis/BlockFrequencyInfo.h"
5657#include " llvm/Analysis/DomTreeUpdater.h"
5758#include " llvm/Analysis/GlobalsModRef.h"
5859#include " llvm/Analysis/InstructionSimplify.h"
7576#include " llvm/IR/Module.h"
7677#include " llvm/InitializePasses.h"
7778#include " llvm/Pass.h"
79+ #include " llvm/Support/CommandLine.h"
7880#include " llvm/Support/Debug.h"
7981#include " llvm/Support/raw_ostream.h"
8082#include " llvm/Transforms/Scalar.h"
8183#include " llvm/Transforms/Utils/BasicBlockUtils.h"
84+ #include < cmath>
8285using namespace llvm ;
8386
8487#define DEBUG_TYPE " tailcallelim"
@@ -87,6 +90,11 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
8790STATISTIC (NumRetDuped, " Number of return duplicated" );
8891STATISTIC (NumAccumAdded, " Number of accumulators introduced" );
8992
93+ static cl::opt<bool > ForceDisableBFI (
94+ " tre-disable-entrycount-recompute" , cl::init(false ), cl::Hidden,
95+ cl::desc(" Force disabling recomputing of function entry count, on "
96+ " successful tail recursion elimination." ));
97+
9098// / Scan the specified function for alloca instructions.
9199// / If it contains any dynamic allocas, returns false.
92100static bool canTRE (Function &F) {
@@ -399,6 +407,9 @@ class TailRecursionEliminator {
399407 AliasAnalysis *AA;
400408 OptimizationRemarkEmitter *ORE;
401409 DomTreeUpdater &DTU;
410+ BlockFrequencyInfo *const BFI;
411+ const uint64_t OrigEntryBBFreq;
412+ const uint64_t OrigEntryCount;
402413
403414 // The below are shared state we want to have available when eliminating any
404415 // calls in the function. There values should be populated by
@@ -428,8 +439,19 @@ class TailRecursionEliminator {
428439
429440 TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
430441 AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
431- DomTreeUpdater &DTU)
432- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
442+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
443+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI),
444+ OrigEntryBBFreq (
445+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U),
446+ OrigEntryCount(F.getEntryCount() ? F.getEntryCount()->getCount() : 0) {
447+ if (BFI) {
448+ // The assert is meant as API documentation for the caller.
449+ assert ((OrigEntryCount != 0 && OrigEntryBBFreq != 0 ) &&
450+ " If a BFI was provided, the function should have both an entry "
451+ " count that is non-zero and an entry basic block with a non-zero "
452+ " frequency." );
453+ }
454+ }
433455
434456 CallInst *findTRECandidate (BasicBlock *BB);
435457
@@ -450,7 +472,7 @@ class TailRecursionEliminator {
450472public:
451473 static bool eliminate (Function &F, const TargetTransformInfo *TTI,
452474 AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
453- DomTreeUpdater &DTU);
475+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
454476};
455477} // namespace
456478
@@ -735,6 +757,28 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
735757 CI->eraseFromParent (); // Remove call.
736758 DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
737759 ++NumEliminated;
760+ if (OrigEntryBBFreq) {
761+ assert (F.getEntryCount ().has_value ());
762+ // This pass is not expected to remove BBs, only add an entry BB. For that
763+ // reason, and because the BB here isn't the new entry BB, the BFI lookup is
764+ // expected to succeed.
765+ assert (&F.getEntryBlock () != BB);
766+ auto RelativeBBFreq =
767+ static_cast <double >(BFI->getBlockFreq (BB).getFrequency ()) /
768+ static_cast <double >(OrigEntryBBFreq);
769+ auto ToSubtract =
770+ static_cast <uint64_t >(std::round (RelativeBBFreq * OrigEntryCount));
771+ auto OldEntryCount = F.getEntryCount ()->getCount ();
772+ if (OldEntryCount <= ToSubtract) {
773+ LLVM_DEBUG (
774+ errs () << " [TRE] The entrycount attributable to the recursive call, "
775+ << ToSubtract
776+ << " , should be strictly lower than the function entry count, "
777+ << OldEntryCount << " \n " );
778+ } else {
779+ F.setEntryCount (OldEntryCount - ToSubtract, F.getEntryCount ()->getType ());
780+ }
781+ }
738782 return true ;
739783}
740784
@@ -861,7 +905,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
861905 const TargetTransformInfo *TTI,
862906 AliasAnalysis *AA,
863907 OptimizationRemarkEmitter *ORE,
864- DomTreeUpdater &DTU) {
908+ DomTreeUpdater &DTU,
909+ BlockFrequencyInfo *BFI) {
865910 if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
866911 return false ;
867912
@@ -877,7 +922,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
877922 return MadeChange;
878923
879924 // Change any tail recursive calls to loops.
880- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
925+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
881926
882927 for (BasicBlock &BB : F)
883928 MadeChange |= TRE.processBlock (BB);
@@ -919,7 +964,8 @@ struct TailCallElim : public FunctionPass {
919964 return TailRecursionEliminator::eliminate (
920965 F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
921966 &getAnalysis<AAResultsWrapperPass>().getAAResults (),
922- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
967+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
968+ /* BFI=*/ nullptr );
923969 }
924970};
925971}
@@ -942,14 +988,22 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
942988
943989 TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
944990 AliasAnalysis &AA = AM.getResult <AAManager>(F);
991+ // This must come first. It needs the 2 analyses, meaning, if it came after
992+ // the lines asking for the cached result, should they be nullptr (which, in
993+ // the case of the PDT, is likely), updates to the trees would be missed.
994+ auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount &&
995+ F.getEntryCount ().has_value () && F.getEntryCount ()->getCount ())
996+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
997+ : nullptr ;
945998 auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
946999 auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
9471000 auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
9481001 // There is no noticable performance difference here between Lazy and Eager
9491002 // UpdateStrategy based on some test results. It is feasible to switch the
9501003 // UpdateStrategy to Lazy if we find it profitable later.
9511004 DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
952- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
1005+ bool Changed =
1006+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
9531007
9541008 if (!Changed)
9551009 return PreservedAnalyses::all ();
0 commit comments