diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h index 42fe225709ef8..b15d6e1707afa 100644 --- a/llvm/include/llvm/Support/BranchProbability.h +++ b/llvm/include/llvm/Support/BranchProbability.h @@ -97,6 +97,9 @@ class BranchProbability { /// \return \c Num divided by \c this. LLVM_ABI uint64_t scaleByInverse(uint64_t Num) const; + /// Compute pow(Probability, N). + BranchProbability pow(unsigned N) const; + BranchProbability &operator+=(BranchProbability RHS) { assert(N != UnknownN && RHS.N != UnknownN && "Unknown probability cannot participate in arithmetics."); diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 2d2355d6be68a..86eb21389756c 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -365,6 +365,40 @@ LLVM_ABI bool setLoopEstimatedTripCount( Loop *L, unsigned EstimatedTripCount, std::optional EstimatedLoopInvocationWeight = std::nullopt); +/// Based on branch weight metadata, return either: +/// - An unknown probability if the implementation is unable to handle the loop +/// form of \p L (e.g., \p L must have a latch block that controls the loop +/// exit). +/// - The probability \c P that, at the end of any iteration, the latch of \p L +/// will start another iteration such that `1 - P` is the probability of +/// exiting the loop. +BranchProbability getLoopProbability(Loop *L); + +/// Set branch weight metadata for the latch of \p L to indicate that, at the +/// end of any iteration, \p P and `1 - P` are the probabilities of starting +/// another iteration and exiting the loop, respectively. Return false if the +/// implementation is unable to handle the loop form of \p L (e.g., \p L must +/// have a latch block that controls the loop exit). Otherwise, return true. +bool setLoopProbability(Loop *L, BranchProbability P); + +/// Based on branch weight metadata, return either: +/// - An unknown probability if the implementation cannot extract the +/// probability (e.g., \p B must have exactly two target labels, so it must be +/// a conditional branch). +/// - The probability \c P that control flows from \p B to its first target +/// label such that `1 - P` is the probability of control flowing to its +/// second target label, or vice-versa if \p ForFirstTarget is false. +BranchProbability getBranchProbability(BranchInst *B, bool ForFirstTarget); + +/// Set branch weight metadata for \p B to indicate that \p P and `1 - P` are +/// the probabilities of control flowing to its first and second target labels, +/// respectively, or vice-versa if \p ForFirstTarget is false. Return false if +/// the implementation cannot set the probability (e.g., \p B must have exactly +/// two target labels, so it must be a conditional branch). Otherwise, return +/// true. +bool setBranchProbability(BranchInst *B, BranchProbability P, + bool ForFirstTarget); + /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially /// true. diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 871c13d972470..a3efc43c62dc3 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -97,7 +97,9 @@ LLVM_ABI bool UnrollRuntimeLoopRemainder( LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, - Loop **ResultLoop = nullptr); + Loop **ResultLoop = nullptr, + std::optional OriginalTripCount = std::nullopt, + BranchProbability OriginalLoopProb = BranchProbability::getUnknown()); LLVM_ABI LoopUnrollResult UnrollAndJamLoop( Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp index e3763449d16cb..ea42f34b58645 100644 --- a/llvm/lib/Support/BranchProbability.cpp +++ b/llvm/lib/Support/BranchProbability.cpp @@ -111,3 +111,10 @@ uint64_t BranchProbability::scale(uint64_t Num) const { uint64_t BranchProbability::scaleByInverse(uint64_t Num) const { return ::scale<0>(Num, D, N); } + +BranchProbability BranchProbability::pow(unsigned N) const { + BranchProbability Res = BranchProbability::getOne(); + for (unsigned I = 0; I < N; ++I) + Res *= *this; + return Res; +} diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 0192ce6a960ff..d3cc53bd46273 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -501,6 +501,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L); std::optional OriginalTripCount = llvm::getLoopEstimatedTripCount(L); + BranchProbability OriginalLoopProb = llvm::getLoopProbability(L); // Effectively "DCE" unrolled iterations that are beyond the max tripcount // and will never be executed. @@ -591,11 +592,11 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, : isEpilogProfitable(L); if (ULO.Runtime && - !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, - EpilogProfitability, ULO.UnrollRemainder, - ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, - PreserveLCSSA, ULO.SCEVExpansionBudget, - ULO.RuntimeUnrollMultiExit, RemainderLoop)) { + !UnrollRuntimeLoopRemainder( + L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, + ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, + PreserveLCSSA, ULO.SCEVExpansionBudget, ULO.RuntimeUnrollMultiExit, + RemainderLoop, OriginalTripCount, OriginalLoopProb)) { if (ULO.Force) ULO.Runtime = false; else { @@ -1130,13 +1131,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, LI->erase(L); // We shouldn't try to use `L` anymore. L = nullptr; - } else if (OriginalTripCount) { + } else { // Update metadata for the loop's branch weights and estimated trip count: // - If ULO.Runtime, UnrollRuntimeLoopRemainder sets the guard branch // weights, latch branch weights, and estimated trip count of the // remainder loop it creates. It also sets the branch weights for the // unrolled loop guard it creates. The branch weights for the unrolled - // loop latch are adjusted below. FIXME: Actually handle ULO.Runtime. + // loop latch are adjusted below. FIXME: Handle prologue loops. // - Otherwise, if unrolled loop iteration latches become unconditional, // branch weights are adjusted above. FIXME: Actually handle such // unconditional latches. @@ -1159,10 +1160,17 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // the unrolled loop as a whole without considering the branch weights for // each unrolled iteration's latch within it, we store the new trip count as // separate metadata. - unsigned NewTripCount = *OriginalTripCount / ULO.Count; - if (!ULO.Runtime && *OriginalTripCount % ULO.Count) - NewTripCount += 1; - setLoopEstimatedTripCount(L, NewTripCount); + if (!OriginalLoopProb.isUnknown() && ULO.Runtime && EpilogProfitability) { + // Where p is always the probability of executing at least 1 more + // iteration, the probability for at least n more iterations is p^n. + setLoopProbability(L, OriginalLoopProb.pow(ULO.Count)); + } + if (OriginalTripCount) { + unsigned NewTripCount = *OriginalTripCount / ULO.Count; + if (!ULO.Runtime && *OriginalTripCount % ULO.Count) + NewTripCount += 1; + setLoopEstimatedTripCount(L, NewTripCount); + } } // LoopInfo should not be valid, confirm that. diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 6312831cf0ee0..a788a58624ec8 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -40,6 +40,7 @@ #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include using namespace llvm; @@ -195,6 +196,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, } } +/// Assume, due to our position in the remainder loop or its guard, anywhere +/// from 0 to \p N more iterations can possibly execute. Among such cases in +/// the original loop (with loop probability \p OriginalLoopProb), what is the +/// probability of executing at least one more iteration? +static BranchProbability +probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) { + // Each of these variables holds the original loop's probability that the + // number of iterations it will execute is some m in the specified range. + BranchProbability ProbOne = OriginalLoopProb; // 1 <= m + BranchProbability ProbTooMany = ProbOne.pow(N + 1); // N + 1 <= m + BranchProbability ProbNotTooMany = ProbTooMany.getCompl(); // 0 <= m <= N + BranchProbability ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N + return ProbOneNotTooMany / ProbNotTooMany; +} + /// Connect the unrolling epilog code to the original loop. /// The unrolling epilog code contains code to execute the /// 'extra' iterations if the run-time trip count modulo the @@ -221,7 +237,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, - unsigned Count, AssumptionCache &AC) { + unsigned Count, AssumptionCache &AC, + BranchProbability OriginalLoopProb) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast(VMap[Latch]); @@ -332,12 +349,19 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, PreserveLCSSA); // Add the branch to the exit block (around the epilog loop) MDNode *BranchWeights = nullptr; - if (hasBranchWeightMD(*Latch->getTerminator())) { + if (OriginalLoopProb.isUnknown() && + hasBranchWeightMD(*Latch->getTerminator())) { // Assume equal distribution in interval [0, Count). MDBuilder MDB(B.getContext()); BranchWeights = MDB.createBranchWeights(1, Count - 1); } - B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); + BranchInst *RemainderLoopGuard = + B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); + if (!OriginalLoopProb.isUnknown()) { + setBranchProbability(RemainderLoopGuard, + probOfNextInRemainder(OriginalLoopProb, Count - 1), + /*ForFirstTarget=*/true); + } InsertPt->eraseFromParent(); if (DT) { auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit); @@ -357,14 +381,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, /// The cloned blocks should be inserted between InsertTop and InsertBot. /// InsertTop should be new preheader, InsertBot new loop exit. /// Returns the new cloned loop that is created. -static Loop * -CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, - const bool UnrollRemainder, - BasicBlock *InsertTop, - BasicBlock *InsertBot, BasicBlock *Preheader, +static Loop *CloneLoopBlocks(Loop *L, Value *NewIter, + const bool UseEpilogRemainder, + const bool UnrollRemainder, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, unsigned Count) { + DominatorTree *DT, LoopInfo *LI, unsigned Count, + std::optional OriginalTripCount, + BranchProbability OriginalLoopProb) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -419,7 +444,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp"); MDNode *BranchWeights = nullptr; - if (hasBranchWeightMD(*LatchBR)) { + if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) && + hasBranchWeightMD(*LatchBR)) { uint32_t ExitWeight; uint32_t BackEdgeWeight; if (Count >= 3) { @@ -437,7 +463,29 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, MDBuilder MDB(Builder.getContext()); BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight); } - Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); + BranchInst *RemainderLoopLatch = + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); + if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) { + // Compute the total frequency of the original loop body from the + // remainder iterations. Once we've reached them, the first of them + // always executes, so its frequency and probability are 1. + double FreqRemIters = 1; + if (Count > 2) { + BranchProbability ProbReaching = BranchProbability::getOne(); + for (unsigned N = Count - 2; N >= 1; --N) { + ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N); + FreqRemIters += double(ProbReaching.getNumerator()) / + ProbReaching.getDenominator(); + } + } + // Solve for the loop probability that would produce that frequency. + // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters. + double ProbDouble = 1 - 1 / FreqRemIters; + BranchProbability Prob = BranchProbability::getBranchProbability( + std::round(ProbDouble * BranchProbability::getDenominator()), + BranchProbability::getDenominator()); + setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true); + } NewIdx->addIncoming(Zero, InsertTop); NewIdx->addIncoming(IdxNext, NewBB); LatchBR->eraseFromParent(); @@ -469,6 +517,8 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, std::optional NewLoopID = makeFollowupLoopID( LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); + if (OriginalTripCount && UseEpilogRemainder) + setLoopEstimatedTripCount(NewLoop, *OriginalTripCount % Count); if (NewLoopID) { NewLoop->setLoopID(*NewLoopID); @@ -603,7 +653,8 @@ bool llvm::UnrollRuntimeLoopRemainder( LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, - Loop **ResultLoop) { + Loop **ResultLoop, std::optional OriginalTripCount, + BranchProbability OriginalLoopProb) { LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); LLVM_DEBUG(L->dump()); LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" @@ -823,12 +874,23 @@ bool llvm::UnrollRuntimeLoopRemainder( BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. MDNode *BranchWeights = nullptr; - if (hasBranchWeightMD(*Latch->getTerminator())) { + if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) && + hasBranchWeightMD(*Latch->getTerminator())) { // Assume loop is nearly always entered. MDBuilder MDB(B.getContext()); BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights); } - B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); + BranchInst *UnrollingLoopGuard = + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); + if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) { + // The original loop's first iteration always happens. Compute the + // probability of the original loop executing Count-1 iterations after that + // to complete the first iteration of the unrolled loop. + BranchProbability ProbOne = OriginalLoopProb; + BranchProbability ProbRest = ProbOne.pow(Count - 1); + setBranchProbability(UnrollingLoopGuard, ProbRest, + /*ForFirstTarget=*/false); + } PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) @@ -855,9 +917,10 @@ bool llvm::UnrollRuntimeLoopRemainder( // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - Loop *remainderLoop = CloneLoopBlocks( - L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, - NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count); + Loop *remainderLoop = + CloneLoopBlocks(L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, + LI, Count, OriginalTripCount, OriginalLoopProb); // Insert the cloned blocks into the function. F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end()); @@ -956,7 +1019,8 @@ bool llvm::UnrollRuntimeLoopRemainder( // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC); + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC, + OriginalLoopProb); // Update counter in loop for unrolling. // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index b6ba82288aeb4..a524385db4c20 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -972,6 +972,49 @@ bool llvm::setLoopEstimatedTripCount( return true; } +BranchProbability llvm::getLoopProbability(Loop *L) { + BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); + if (!LatchBranch) + return BranchProbability::getUnknown(); + bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader(); + return getBranchProbability(LatchBranch, FirstTargetIsLoop); +} + +bool llvm::setLoopProbability(Loop *L, BranchProbability P) { + BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); + if (!LatchBranch) + return false; + bool FirstTargetIsLoop = LatchBranch->getSuccessor(0) == L->getHeader(); + return setBranchProbability(LatchBranch, P, FirstTargetIsLoop); +} + +BranchProbability llvm::getBranchProbability(BranchInst *B, + bool ForFirstTarget) { + if (B->getNumSuccessors() != 2) + return BranchProbability::getUnknown(); + uint64_t Weight0, Weight1; + if (!extractBranchWeights(*B, Weight0, Weight1)) + return BranchProbability::getUnknown(); + if (!ForFirstTarget) + std::swap(Weight0, Weight1); + return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1); +} + +bool llvm::setBranchProbability(BranchInst *B, BranchProbability P, + bool ForFirstTarget) { + if (B->getNumSuccessors() != 2) + return false; + BranchProbability Prob0 = P; + BranchProbability Prob1 = P.getCompl(); + if (!ForFirstTarget) + std::swap(Prob0, Prob1); + MDBuilder MDB(B->getContext()); + B->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(Prob0.getNumerator(), Prob1.getNumerator())); + return true; +} + bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop, ScalarEvolution &SE) { Loop *OuterL = InnerLoop->getParentLoop(); diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll new file mode 100644 index 0000000000000..96b31d801c2f9 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll @@ -0,0 +1,160 @@ +; Test branch weight metadata, estimated trip count metadata, and block +; frequencies after loop unrolling with an epilogue. + +; ------------------------------------------------------------------------------ +; Define substitutions. +; +; Check original loop body frequency. +; DEFINE: %{bf-fc} = opt %s -S -passes='print' 2>&1 | \ +; DEFINE: FileCheck %s -check-prefixes +; +; Unroll loops and then check block frequency. The -implicit-check-not options +; make sure that no additional labels or @f calls show up. +; DEFINE: %{ur-bf} = opt %s -S -passes='loop-unroll,print' 2>&1 +; DEFINE: %{fc} = FileCheck %s \ +; DEFINE: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \ +; DEFINE: -implicit-check-not='call void @f' -check-prefixes + +; ------------------------------------------------------------------------------ +; Check various interesting unroll count values relative to the original loop's +; estimated trip count of 11 (e.g., minimum and boundary values). +; +; RUN: %{bf-fc} ALL,ORIG +; RUN: %{ur-bf} -unroll-count=2 -unroll-runtime | %{fc} ALL,UR,UR2 +; RUN: %{ur-bf} -unroll-count=4 -unroll-runtime | %{fc} ALL,UR,UR4 +; RUN: %{ur-bf} -unroll-count=10 -unroll-runtime | %{fc} ALL,UR,UR10 +; RUN: %{ur-bf} -unroll-count=11 -unroll-runtime | %{fc} ALL,UR,UR11 +; RUN: %{ur-bf} -unroll-count=12 -unroll-runtime | %{fc} ALL,UR,UR12 + +; ------------------------------------------------------------------------------ +; Check the iteration frequencies, which, when each is multiplied by the number +; of original loop bodies that execute within it, should sum to almost exactly +; the original loop body frequency. +; +; ALL-LABEL: block-frequency-info: test +; +; ORIG: - [[ENTRY:.*]]: +; ORIG: - [[DO_BODY:.*]]: float = 11.0, +; ORIG: - [[DO_END:.*]]: +; +; UR: - [[ENTRY:.*]]: +; UR: - [[ENTRY_NEW:.*]]: +; UR2: - [[DO_BODY:.*]]: float = 5.2381, +; UR4: - [[DO_BODY:.*]]: float = 2.3702, +; UR10: - [[DO_BODY:.*]]: float = 0.6902, +; UR11: - [[DO_BODY:.*]]: float = 0.59359, +; UR12: - [[DO_BODY:.*]]: float = 0.5144, +; UR: - [[DO_END_UNR_LCSSA:.*]]: +; UR: - [[DO_BODY_EPIL_PREHEADER:.*]]: +; UR2: - [[DO_BODY_EPIL:.*]]: float = 0.52381, +; UR4: - [[DO_BODY_EPIL:.*]]: float = 1.5193, +; UR10: - [[DO_BODY_EPIL:.*]]: float = 4.098, +; UR11: - [[DO_BODY_EPIL:.*]]: float = 4.4705, +; UR12: - [[DO_BODY_EPIL:.*]]: float = 4.8272, +; UR4: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR10: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR11: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR12: - [[DO_END_EPILOG_LCSSA:.*]]: +; UR: - [[DO_END:.*]]: + +; ------------------------------------------------------------------------------ +; Check the CFGs, including the number of original loop bodies that appear +; within each unrolled iteration. +; +; UR-LABEL: define void @test(i32 %{{.*}}) { +; UR: [[ENTRY]]: +; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[ENTRY_NEW]], !prof ![[#PROF_UR_GUARD:]]{{$}} +; UR: [[ENTRY_NEW]]: +; UR: br label %[[DO_BODY]] +; UR: [[DO_BODY]]: +; UR2-COUNT-2: call void @f +; UR4-COUNT-4: call void @f +; UR10-COUNT-10: call void @f +; UR11-COUNT-11: call void @f +; UR12-COUNT-12: call void @f +; UR: br i1 %{{.*}}, label %[[DO_END_UNR_LCSSA]], label %[[DO_BODY]], !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]{{$}} +; UR: [[DO_END_UNR_LCSSA]]: +; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[DO_END:.*]], !prof ![[#PROF_RM_GUARD:]]{{$}} +; UR: [[DO_BODY_EPIL_PREHEADER]]: +; UR: br label %[[DO_BODY_EPIL]] +; UR: [[DO_BODY_EPIL]]: +; UR: call void @f +; UR4: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR10: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR11: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR12: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}} +; UR4: [[DO_END_EPILOG_LCSSA]]: +; UR10: [[DO_END_EPILOG_LCSSA]]: +; UR11: [[DO_END_EPILOG_LCSSA]]: +; UR12: [[DO_END_EPILOG_LCSSA]]: +; UR: br label %[[DO_END]] +; UR: [[DO_END]]: +; UR: ret void + +declare void @f(i32) + +define void @test(i32 %n) { +entry: + br label %do.body + +do.body: + %i = phi i32 [ 0, %entry ], [ %inc, %do.body ] + %inc = add i32 %i, 1 + call void @f(i32 %i) + %c = icmp sge i32 %inc, %n + br i1 %c, label %do.end, label %do.body, !prof !0 + +do.end: + ret void +} + +!0 = !{!"branch_weights", i32 1, i32 10} + +; ------------------------------------------------------------------------------ +; Check branch weight metadata and estimated trip count metadata. +; +; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862} +; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250} +; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1236740947, i32 910742701} +; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1319535738, i32 827947910} +; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1394803730, i32 752679918} +; +; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 372703773, i32 1774779875} +; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 680723421, i32 1466760227} +; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1319535738, i32 827947910} +; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1394803730, i32 752679918} +; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1463229177, i32 684254471} +; +; UR2: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR4: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR10: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR12: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; +; UR2: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 5} +; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2} +; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} +; +; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388} +; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1531603292, i32 615880356} +; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1829762672, i32 317720976} +; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1846907894, i32 300575754} +; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1860963812, i32 286519836} +; +; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1038564635, i32 1108919013} +; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1656332913, i32 491150735} +; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1693034047, i32 454449601} +; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1723419551, i32 424064097} + +; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; +; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; For UR10, llvm.loop.estimated_trip_count is the same for both loops. +; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll index 0c52b5a0edef8..047360178aa06 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll @@ -188,7 +188,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { ; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ poison, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7 -; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]] +; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF6:![0-9]+]] ; CHECK: outer.header.new: ; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]] ; CHECK: inner.1.header: @@ -232,7 +232,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { ; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8 ; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8 ; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0 -; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: outer.middle.unr-lcssa: ; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ] ; CHECK-NEXT: br label [[OUTER_MIDDLE]] diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index db87143286f93..2f8f98d40e86f 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -2,15 +2,24 @@ ;; Check that the remainder loop is properly assigned a branch weight for its latch branch. ; CHECK-LABEL: @test( -; CHECK-LABEL: for.body: -; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]] -; CHECK-LABEL: for.body.epil: -; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]] - -; FIXME: These branch weights are incorrect and should not be merged into main -; until PR #159163, which fixes them. -; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9999} -; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1} +; CHECK-LABEL: entry: +; CHECK: [[FOR_BODY_PREHEADER:.*]]: +; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]], !prof ![[#PROF_UR_GUARD:]] +; CHECK: [[FOR_BODY_PREHEADER_NEW]]: +; CHECK: br label %for.body +; CHECK: for.body: +; CHECK: %add = add +; CHECK: %add.1 = add +; CHECK: %add.2 = add +; CHECK: %add.3 = add +; CHECK-NOT: %add.4 = add +; CHECK: br i1 %{{.*}}, label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %for.body, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]: +; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]], !prof ![[#PROF_RM_GUARD:]] +; CHECK: [[FOR_BODY_EPIL_PREHEADER]]: +; CHECK: br label %[[FOR_BODY_EPIL:.*]] +; CHECK: [[FOR_BODY_EPIL]]: +; CHECK: br i1 {{.*}}, label %[[FOR_BODY_EPIL]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]] define i3 @test(ptr %a, i3 %n) { entry: @@ -34,3 +43,37 @@ for.end: } !0 = !{!"branch_weights", i32 1, i32 9999} + +; Original loop probability: p = 9999/(1+9999) = 0.9999 +; Original estimated trip count: (1+9999)/1 = 10000 +; Unroll count: 4 + +; Probability of >=3 iterations after first: p^3 = 0.9970003 =~ +; 2146839468 / (644180 + 2146839468). +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 644180, i32 2146839468} + +; Probability of >=4 more iterations: p^4 = 0.99960006 =~ +; 2146624784 / (858864 + 2146624784). +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 858864, i32 2146624784} + +; 10000//4 = 2500 +; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2500} +; +; CHECK: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} + +; Probability of 1 to 3 more of 3 more remainder iterations: +; (p-p^4)/(1-p^4) = 0.749962497 =~ 1610532724 / (1610532724 + 536950924). +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1610532724, i32 536950924} + +; Frequency of first remainder iter: r1 = 1 +; Frequency of second remainder iter: r2 = r1*(p-p^3)/(1-p^3) = 0.666633331 +; Frequency of third remainder iter: r3 = r2*(p-p^2)/(1-p^2) = 0.333299999 +; Solve for loop probability that produces that frequency: f = 1/(1-p') => +; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332 =~ +; 1073706403 / (1073706403 + 1073777245). +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245} + +; 10000%4 = 0 +; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index 492de063573be..ec7aba432b484 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -295,11 +295,12 @@ exit2.loopexit: ; COMMON-LABEL: {{^}}!0 = ; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} -; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127} -; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7} -; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1} +; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 326124004, i32 1821359644} +; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1856428066, i32 291055582} +; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 1597681585, i32 549802063} -; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]} +; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_TC:![0-9]+]], [[EPILOG_LOOP_1:![0-9]+]]} +; EPILOG: [[EPILOG_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} ; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"} ; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll index 1cd70f1d1dfd3..02f5bf932132e 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll @@ -3,14 +3,27 @@ @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16 ; CHECK-LABEL: @bar_prof -; CHECK: loop: -; CHECK: %mul = mul -; CHECK: %mul.1 = mul -; CHECK: %mul.2 = mul -; CHECK: %mul.3 = mul -; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa, label %loop, !prof [[PROF0:![0-9]+]] -; CHECK: loop.epil: -; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}} +; CHECK: entry: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]], !prof ![[#PROF_UR_GUARD:]] +; CHECK: [[ENTRY_NEW]]: +; CHECK: br label %loop +; CHECK: loop: +; CHECK: %mul = mul +; CHECK: %mul.1 = mul +; CHECK: %mul.2 = mul +; CHECK: %mul.3 = mul +; CHECK: %mul.4 = mul +; CHECK: %mul.5 = mul +; CHECK: %mul.6 = mul +; CHECK: %mul.7 = mul +; CHECK-NOT: %mul.8 = mul +; CHECK: br i1 %{{.*}}, label %[[LOOP_END_UNR_LCSSA:.*]], label %loop, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]] +; CHECK: [[LOOP_END_UNR_LCSSA]]: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER]], label %loop.end, !prof ![[#PROF_RM_GUARD:]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL]], label %[[LOOP_END_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]] define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 { entry: br label %loop @@ -60,7 +73,38 @@ loop.end: !1 = !{!"function_entry_count", i64 1} !2 = !{!"branch_weights", i32 1, i32 1000} -; FIXME: These branch weights are incorrect and should not be merged into main -; until PR #159163, which fixes them. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1} +; Original loop probability: p = 1000/(1+1000) = 0.999000999 +; Original estimated trip count: (1+1000)/1 = 1001 +; Unroll count: 8 + +; Probability of >=7 iterations after first: p^7 = 0.993027916 =~ +; 2132511214 / (14972434 + 2132511214). +; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 14972434, i32 2132511214} + +; Probability of >=8 more iterations: p^8 = 0.99203588 =~ +; 2130380833 / (17102815 + 2130380833). +; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17102815, i32 2130380833} + +; 1001//8 = 125 +; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]]} +; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 125} + +; Probability of 1 to 7 more of 7 more remainder iterations: +; (p-p^8)/(1-p^8) = 0.874562282 =~ 1878108210 / (1878108210 + 269375438). +; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1878108210, i32 269375438} + +; Frequency of first remainder iter: r1 = 1 +; Frequency of second remainder iter: r2 = r1*(p-p^7)/(1-p^7) = 0.856714143 +; Frequency of third remainder iter: r3 = r2*(p-p^6)/(1-p^6) = 0.713571429 +; Frequency of fourth remainder iter: r4 = r2*(p-p^5)/(1-p^5) = 0.570571715 +; Frequency of fifth remainder iter: r5 = r2*(p-p^4)/(1-p^4) = 0.427714858 +; Frequency of sixth remainder iter: r6 = r2*(p-p^3)/(1-p^3) = 0.285000715 +; Frequency of seventh remainder iter: r7 = r2*(p-p^2)/(1-p^2) = 0.142429143 +; Solve for loop probability that produces that frequency: f = 1/(1-p') => +; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875 =~ +; 1610075606 / (1610075606 + 537408042). +; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1610075606, i32 537408042} + +; Remainder estimated trip count: 1001%8 = 1 +; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}