diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h index b15d6e1707afa..7e6f8f204fa8f 100644 --- a/llvm/include/llvm/Support/BranchProbability.h +++ b/llvm/include/llvm/Support/BranchProbability.h @@ -57,6 +57,8 @@ class BranchProbability { // Create a BranchProbability object from 64-bit integers. LLVM_ABI static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator); + // Create a BranchProbability from a double, which must be from 0 to 1. + LLVM_ABI static BranchProbability getBranchProbability(double Prob); // Normalize given probabilties so that the sum of them becomes approximate // one. @@ -71,6 +73,7 @@ class BranchProbability { uint32_t getNumerator() const { return N; } static uint32_t getDenominator() { return D; } + double toDouble() const { return double(N) / D; } // Return (1 - Probability). BranchProbability getCompl() const { return BranchProbability(D - N); } diff --git a/llvm/lib/Support/BranchProbability.cpp b/llvm/lib/Support/BranchProbability.cpp index 143e58a05d3b7..2b71193e0dfd5 100644 --- a/llvm/lib/Support/BranchProbability.cpp +++ b/llvm/lib/Support/BranchProbability.cpp @@ -60,6 +60,11 @@ BranchProbability::getBranchProbability(uint64_t Numerator, return BranchProbability(Numerator >> Scale, Denominator); } +BranchProbability BranchProbability::getBranchProbability(double Prob) { + assert(0 <= Prob && Prob <= 1 && "Probability must be between 0 and 1!"); + return BranchProbability(std::round(Prob * D), D); +} + // If ConstD is not zero, then replace D by ConstD so that division and modulo // operations by D can be optimized, in case this function is not inlined by the // compiler. diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index d2dbe293e028a..8537e925e4ab0 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -1183,8 +1183,33 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, // If the original loop may only execute a single iteration we need to // insert a trip count check and skip the original loop with the last - // iteration peeled off if necessary. - if (!SE->isKnownNonZero(BTC)) { + // iteration peeled off if necessary. Either way, we must update branch + // weights to maintain the loop body frequency. + if (SE->isKnownNonZero(BTC)) { + // We have just proven that, when reached, the original loop always + // executes at least two iterations. Thus, we unconditionally execute + // both the remaining loop's initial iteration and the peeled iteration. + // But that increases the latter's frequency above its frequency in the + // original loop. To maintain the total frequency, we compensate by + // decreasing the remaining loop body's frequency to indicate one less + // iteration. + // + // We use this formula to convert probability to/from frequency: + // Sum(i=0..inf)(P^i) = 1/(1-P) = Freq. + if (BranchProbability P = getLoopProbability(L); !P.isUnknown()) { + // Trying to subtract one from an infinite loop is pointless, and our + // formulas then produce division by zero, so skip that case. + if (BranchProbability ExitP = P.getCompl(); !ExitP.isZero()) { + double Freq = 1 / ExitP.toDouble(); + // No branch weights can produce a frequency of less than one given + // the initial iteration, and our formulas produce a negative + // probability if we try. + double NewFreq = std::max(Freq - 1, 1.0); + setLoopProbability( + L, BranchProbability::getBranchProbability(1 - 1 / NewFreq)); + } + } + } else { NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI); SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel"); diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 6c9467bf4a005..7de868364c607 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -495,16 +495,13 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter, BranchProbability ProbReaching = BranchProbability::getOne(); for (unsigned N = Count - 2; N >= 1; --N) { ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N); - FreqRemIters += double(ProbReaching.getNumerator()) / - ProbReaching.getDenominator(); + FreqRemIters += ProbReaching.toDouble(); } } // Solve for the loop probability that would produce that frequency. // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters. - double ProbDouble = 1 - 1 / FreqRemIters; - BranchProbability Prob = BranchProbability::getBranchProbability( - std::round(ProbDouble * BranchProbability::getDenominator()), - BranchProbability::getDenominator()); + BranchProbability Prob = + BranchProbability::getBranchProbability(1 - 1 / FreqRemIters); setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true); } NewIdx->addIncoming(Zero, InsertTop); diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration-no-guard.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration-no-guard.ll new file mode 100644 index 0000000000000..ff974f9dc6102 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel-last-iteration-no-guard.ll @@ -0,0 +1,87 @@ +; Check that the loop body frequency is maintained when LoopPeel both: +; - Peels one unconditional iteration after the loop. +; - Does not add a guard to sometimes skip the remaining loop because it has +; proven the original loop always executes at least two iterations, which +; become the initial iteration and the peeled iteration. + +; DEFINE: %{exitWeight} = +; DEFINE: %{loopWeight} = +; DEFINE: %{loopFreqOld} = +; DEFINE: %{loopFreqNew} = + +; DEFINE: %{run} = \ +; DEFINE: cp %s %t.ll && chmod +w %t.ll && \ +; DEFINE: echo '!0 = !{!"branch_weights", i32 %{exitWeight}, ' \ +; DEFINE: 'i32 %{loopWeight}}' >> %t.ll && \ +; DEFINE: opt -p "print,loop-unroll,print" \ +; DEFINE: -unroll-full-max-count=0 -S %t.ll 2>&1 | \ +; DEFINE: FileCheck -DLOOP_FREQ_OLD='%{loopFreqOld}' \ +; DEFINE: -DLOOP_FREQ_NEW='%{loopFreqNew}' %s + +; Branch weights give the original loop 10 iterations. We expect that +; loopFreqOld = loopFreqNew + 1. +; REDEFINE: %{exitWeight} = 1 +; REDEFINE: %{loopWeight} = 9 +; REDEFINE: %{loopFreqOld} = 10.0 +; REDEFINE: %{loopFreqNew} = 9.0 +; RUN: %{run} + +; Branch weights give the original loop 2 iterations. We expect that +; loopFreqOld = loopFreqNew + 1. +; REDEFINE: %{exitWeight} = 1 +; REDEFINE: %{loopWeight} = 1 +; REDEFINE: %{loopFreqOld} = 2.0 +; REDEFINE: %{loopFreqNew} = 1.0 +; RUN: %{run} + +; Branch weights give the original loop 1 iteration, but LoopPeel proved it has +; at least 2. There is no loop probability that produces a frequency below 1, +; so the original total frequency cannot be maintained. +; REDEFINE: %{exitWeight} = 1 +; REDEFINE: %{loopWeight} = 0 +; REDEFINE: %{loopFreqOld} = 1.0 +; REDEFINE: %{loopFreqNew} = 1.0 +; RUN: %{run} + +; Branch weights say the original loop is infinite, maximizing the frequency, +; so LoopPeel does not try to decrement it. +; REDEFINE: %{exitWeight} = 0 +; REDEFINE: %{loopWeight} = 1 +; REDEFINE: %{loopFreqOld} = 2147483647.8 +; REDEFINE: %{loopFreqNew} = 2147483647.8 +; RUN: %{run} + +; Everything other than loop should be 1.0 because it is reached once. +; +; CHECK: block-frequency-info: test +; CHECK-NEXT: - entry: float = 1.0, +; CHECK-NEXT: - loop: float = [[LOOP_FREQ_OLD]], +; CHECK-NEXT: - exit: float = 1.0, +; +; CHECK: block-frequency-info: test +; CHECK-NEXT: - entry: float = 1.0, +; CHECK-NEXT: - loop: float = [[LOOP_FREQ_NEW]], +; CHECK-NEXT: - exit.peel.begin: float = 1.0, +; CHECK-NEXT: - loop.peel: float = 1.0, +; CHECK-NEXT: - exit.peel.next: float = 1.0, +; CHECK-NEXT: - loop.peel.next: float = 1.0, +; CHECK-NEXT: - exit: float = 1.0, + +declare void @f(i32) + +define void @test() { +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %isLast = icmp eq i32 %i, 20 + %sel = select i1 %isLast, i32 1, i32 0 + call void @f(i32 %sel) + %inc = add i32 %i, 1 + %isLast1 = icmp eq i32 %i, 20 + br i1 %isLast1, label %exit, label %loop, !prof !0 + +exit: + ret void +}