5353#include " llvm/ADT/STLExtras.h"
5454#include " llvm/ADT/SmallPtrSet.h"
5555#include " llvm/ADT/Statistic.h"
56+ #include " llvm/Analysis/BlockFrequencyInfo.h"
5657#include " llvm/Analysis/DomTreeUpdater.h"
5758#include " llvm/Analysis/GlobalsModRef.h"
5859#include " llvm/Analysis/InstructionSimplify.h"
7576#include " llvm/IR/Module.h"
7677#include " llvm/InitializePasses.h"
7778#include " llvm/Pass.h"
79+ #include " llvm/Support/CommandLine.h"
7880#include " llvm/Support/Debug.h"
7981#include " llvm/Support/raw_ostream.h"
8082#include " llvm/Transforms/Scalar.h"
8183#include " llvm/Transforms/Utils/BasicBlockUtils.h"
84+ #include < cmath>
8285using namespace llvm ;
8386
8487#define DEBUG_TYPE " tailcallelim"
@@ -87,6 +90,11 @@ STATISTIC(NumEliminated, "Number of tail calls removed");
8790STATISTIC (NumRetDuped, " Number of return duplicated" );
8891STATISTIC (NumAccumAdded, " Number of accumulators introduced" );
8992
93+ static cl::opt<bool > ForceDisableBFI (
94+ " tre-disable-entrycount-recompute" , cl::init(false ), cl::Hidden,
95+ cl::desc(" Force disabling recomputing of function entry count, on "
96+ " successful tail recursion elimination." ));
97+
9098// / Scan the specified function for alloca instructions.
9199// / If it contains any dynamic allocas, returns false.
92100static bool canTRE (Function &F) {
@@ -399,6 +407,8 @@ class TailRecursionEliminator {
399407 AliasAnalysis *AA;
400408 OptimizationRemarkEmitter *ORE;
401409 DomTreeUpdater &DTU;
410+ BlockFrequencyInfo *const BFI;
411+ const uint64_t OrigEntryBBFreq;
402412
403413 // The below are shared state we want to have available when eliminating any
404414 // calls in the function. There values should be populated by
@@ -428,8 +438,20 @@ class TailRecursionEliminator {
428438
429439 TailRecursionEliminator (Function &F, const TargetTransformInfo *TTI,
430440 AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
431- DomTreeUpdater &DTU)
432- : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
441+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI)
442+ : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI),
443+ OrigEntryBBFreq (
444+ BFI ? BFI->getBlockFreq (&F.getEntryBlock()).getFrequency() : 0U) {
445+ if (BFI) {
446+ auto EC = F.getEntryCount ();
447+ (void )EC;
448+ assert (
449+ (EC.has_value () && EC->getCount () != 0 && OrigEntryBBFreq) &&
450+ " If the function has an entry count, its entry basic block should "
451+ " have a non-zero frequency. Pass a nullptr BFI if the function has "
452+ " no entry count" );
453+ }
454+ }
433455
434456 CallInst *findTRECandidate (BasicBlock *BB);
435457
@@ -450,7 +472,7 @@ class TailRecursionEliminator {
450472public:
451473 static bool eliminate (Function &F, const TargetTransformInfo *TTI,
452474 AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
453- DomTreeUpdater &DTU);
475+ DomTreeUpdater &DTU, BlockFrequencyInfo *BFI );
454476};
455477} // namespace
456478
@@ -735,6 +757,21 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
735757 CI->eraseFromParent (); // Remove call.
736758 DTU.applyUpdates ({{DominatorTree::Insert, BB, HeaderBB}});
737759 ++NumEliminated;
760+ if (OrigEntryBBFreq) {
761+ assert (F.getEntryCount ().has_value ());
762+ // This pass is not expected to remove BBs, only add an entry BB. For that
763+ // reason, and because the BB here isn't the new entry BB, the BFI lookup is
764+ // expected to succeed.
765+ assert (&F.getEntryBlock () != BB);
766+ auto RelativeBBFreq =
767+ static_cast <double >(BFI->getBlockFreq (BB).getFrequency ()) /
768+ static_cast <double >(OrigEntryBBFreq);
769+ auto OldEntryCount = F.getEntryCount ()->getCount ();
770+ auto ToSubtract =
771+ static_cast <uint64_t >(std::round (RelativeBBFreq * OldEntryCount));
772+ assert (OldEntryCount > ToSubtract);
773+ F.setEntryCount (OldEntryCount - ToSubtract, F.getEntryCount ()->getType ());
774+ }
738775 return true ;
739776}
740777
@@ -861,7 +898,8 @@ bool TailRecursionEliminator::eliminate(Function &F,
861898 const TargetTransformInfo *TTI,
862899 AliasAnalysis *AA,
863900 OptimizationRemarkEmitter *ORE,
864- DomTreeUpdater &DTU) {
901+ DomTreeUpdater &DTU,
902+ BlockFrequencyInfo *BFI) {
865903 if (F.getFnAttribute (" disable-tail-calls" ).getValueAsBool ())
866904 return false ;
867905
@@ -877,7 +915,7 @@ bool TailRecursionEliminator::eliminate(Function &F,
877915 return MadeChange;
878916
879917 // Change any tail recursive calls to loops.
880- TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU);
918+ TailRecursionEliminator TRE (F, TTI, AA, ORE, DTU, BFI );
881919
882920 for (BasicBlock &BB : F)
883921 MadeChange |= TRE.processBlock (BB);
@@ -919,7 +957,8 @@ struct TailCallElim : public FunctionPass {
919957 return TailRecursionEliminator::eliminate (
920958 F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (F),
921959 &getAnalysis<AAResultsWrapperPass>().getAAResults (),
922- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU);
960+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE (), DTU,
961+ nullptr );
923962 }
924963};
925964}
@@ -942,14 +981,22 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
942981
943982 TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
944983 AliasAnalysis &AA = AM.getResult <AAManager>(F);
984+ // This must come first. It needs the 2 analyses, meaning, if it came after
985+ // the lines asking for the cached result, should they be nullptr (which, in
986+ // the case of the PDT, is likely), updates to the trees would be missed.
987+ auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount &&
988+ F.getEntryCount ().has_value () && F.getEntryCount ()->getCount ())
989+ ? &AM.getResult <BlockFrequencyAnalysis>(F)
990+ : nullptr ;
945991 auto &ORE = AM.getResult <OptimizationRemarkEmitterAnalysis>(F);
946992 auto *DT = AM.getCachedResult <DominatorTreeAnalysis>(F);
947993 auto *PDT = AM.getCachedResult <PostDominatorTreeAnalysis>(F);
948994 // There is no noticable performance difference here between Lazy and Eager
949995 // UpdateStrategy based on some test results. It is feasible to switch the
950996 // UpdateStrategy to Lazy if we find it profitable later.
951997 DomTreeUpdater DTU (DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
952- bool Changed = TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU);
998+ bool Changed =
999+ TailRecursionEliminator::eliminate (F, &TTI, &AA, &ORE, DTU, BFI);
9531000
9541001 if (!Changed)
9551002 return PreservedAnalyses::all ();
0 commit comments