Skip to content

Commit e26c371

Browse files
committed
[LoopPeel] Fix BFI when peeling last iteration without guard
LoopPeel sometimes proves that, when reached, the original loop always executes at least two iterations. LoopPeel then unconditionally executes both the remaining loop's initial iteration and the peeled final iteration. But that increases the latter's frequency above its frequency in the original loop. To maintain the total frequency, this patch compensates by decreasing the remaininng loop's latch probability. The is another step in issue #135812 and was discussed at <#166858 (comment)>.
1 parent d343913 commit e26c371

File tree

5 files changed

+125
-8
lines changed

5 files changed

+125
-8
lines changed

llvm/include/llvm/Support/BranchProbability.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class BranchProbability {
5757
// Create a BranchProbability object from 64-bit integers.
5858
LLVM_ABI static BranchProbability getBranchProbability(uint64_t Numerator,
5959
uint64_t Denominator);
60+
// Create a BranchProbability from a double, which must be from 0 to 1.
61+
LLVM_ABI static BranchProbability getBranchProbability(double Prob);
6062

6163
// Normalize given probabilties so that the sum of them becomes approximate
6264
// one.
@@ -71,6 +73,7 @@ class BranchProbability {
7173

7274
uint32_t getNumerator() const { return N; }
7375
static uint32_t getDenominator() { return D; }
76+
double toDouble() const { return double(N) / D; }
7477

7578
// Return (1 - Probability).
7679
BranchProbability getCompl() const { return BranchProbability(D - N); }

llvm/lib/Support/BranchProbability.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ BranchProbability::getBranchProbability(uint64_t Numerator,
6060
return BranchProbability(Numerator >> Scale, Denominator);
6161
}
6262

63+
BranchProbability BranchProbability::getBranchProbability(double Prob) {
64+
assert(0 <= Prob && Prob <= 1 && "Probability must be between 0 and 1!");
65+
return BranchProbability(std::round(Prob * D), D);
66+
}
67+
6368
// If ConstD is not zero, then replace D by ConstD so that division and modulo
6469
// operations by D can be optimized, in case this function is not inlined by the
6570
// compiler.

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,8 +1183,33 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
11831183

11841184
// If the original loop may only execute a single iteration we need to
11851185
// insert a trip count check and skip the original loop with the last
1186-
// iteration peeled off if necessary.
1187-
if (!SE->isKnownNonZero(BTC)) {
1186+
// iteration peeled off if necessary. Either way, we must update branch
1187+
// weights to maintain the loop body frequency.
1188+
if (SE->isKnownNonZero(BTC)) {
1189+
// We have just proven that, when reached, the original loop always
1190+
// executes at least two iterations. Thus, we unconditionally execute
1191+
// both the remaining loop's initial iteration and the peeled iteration.
1192+
// But that increases the latter's frequency above its frequency in the
1193+
// original loop. To maintain the total frequency, we compensate by
1194+
// decreasing the remaining loop body's frequency to indicate one less
1195+
// iteration.
1196+
//
1197+
// We use this formula to convert probability to/from frequency:
1198+
// Sum(i=0..inf)(P^i) = 1/(1-P) = Freq.
1199+
if (BranchProbability P = getLoopProbability(L); !P.isUnknown()) {
1200+
// Trying to subtract one from an infinite loop is pointless, and our
1201+
// formulas then produce division by zero, so skip that case.
1202+
if (BranchProbability ExitP = P.getCompl(); !ExitP.isZero()) {
1203+
double Freq = 1 / ExitP.toDouble();
1204+
// No branch weights can produce a frequency of less than one given
1205+
// the initial iteration, and our formulas produce a negative
1206+
// probability if we try.
1207+
double NewFreq = std::max(Freq - 1, 1.0);
1208+
setLoopProbability(
1209+
L, BranchProbability::getBranchProbability(1 - 1 / NewFreq));
1210+
}
1211+
}
1212+
} else {
11881213
NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
11891214
SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");
11901215

llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -495,16 +495,13 @@ static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,
495495
BranchProbability ProbReaching = BranchProbability::getOne();
496496
for (unsigned N = Count - 2; N >= 1; --N) {
497497
ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N);
498-
FreqRemIters += double(ProbReaching.getNumerator()) /
499-
ProbReaching.getDenominator();
498+
FreqRemIters += ProbReaching.toDouble();
500499
}
501500
}
502501
// Solve for the loop probability that would produce that frequency.
503502
// Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters.
504-
double ProbDouble = 1 - 1 / FreqRemIters;
505-
BranchProbability Prob = BranchProbability::getBranchProbability(
506-
std::round(ProbDouble * BranchProbability::getDenominator()),
507-
BranchProbability::getDenominator());
503+
BranchProbability Prob =
504+
BranchProbability::getBranchProbability(1 - 1 / FreqRemIters);
508505
setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true);
509506
}
510507
NewIdx->addIncoming(Zero, InsertTop);
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; Check that the loop body frequency is maintained when LoopPeel both:
2+
; - Peels one unconditional iteration after the loop.
3+
; - Does not add a guard to sometimes skip the remaining loop because it has
4+
; proven the original loop always executes at least two iterations, which
5+
; become the initial iteration and the peeled iteration.
6+
7+
; DEFINE: %{exitWeight} =
8+
; DEFINE: %{loopWeight} =
9+
; DEFINE: %{loopFreqOld} =
10+
; DEFINE: %{loopFreqNew} =
11+
12+
; DEFINE: %{run} = \
13+
; DEFINE: cp %s %t.ll && chmod +w %t.ll && \
14+
; DEFINE: echo '!0 = !{!"branch_weights", i32 %{exitWeight}, ' \
15+
; DEFINE: 'i32 %{loopWeight}}' >> %t.ll && \
16+
; DEFINE: opt -p "print<block-freq>,loop-unroll,print<block-freq>" \
17+
; DEFINE: -unroll-full-max-count=0 -S %t.ll 2>&1 | \
18+
; DEFINE: FileCheck -DLOOP_FREQ_OLD='%{loopFreqOld}' \
19+
; DEFINE: -DLOOP_FREQ_NEW='%{loopFreqNew}' %s
20+
21+
; Branch weights give the original loop 10 iterations. We expect that
22+
; loopFreqOld = loopFreqNew + 1.
23+
; REDEFINE: %{exitWeight} = 1
24+
; REDEFINE: %{loopWeight} = 9
25+
; REDEFINE: %{loopFreqOld} = 10.0
26+
; REDEFINE: %{loopFreqNew} = 9.0
27+
; RUN: %{run}
28+
29+
; Branch weights give the original loop 2 iterations. We expect that
30+
; loopFreqOld = loopFreqNew + 1.
31+
; REDEFINE: %{exitWeight} = 1
32+
; REDEFINE: %{loopWeight} = 1
33+
; REDEFINE: %{loopFreqOld} = 2.0
34+
; REDEFINE: %{loopFreqNew} = 1.0
35+
; RUN: %{run}
36+
37+
; Branch weights give the original loop 1 iteration, but LoopPeel proved it has
38+
; at least 2. There is no loop probability that produces a frequency below 1,
39+
; so the original total frequency cannot be maintained.
40+
; REDEFINE: %{exitWeight} = 1
41+
; REDEFINE: %{loopWeight} = 0
42+
; REDEFINE: %{loopFreqOld} = 1.0
43+
; REDEFINE: %{loopFreqNew} = 1.0
44+
; RUN: %{run}
45+
46+
; Branch weights say the original loop is infinite, maximizing the frequency,
47+
; so LoopPeel does not try to decrement it.
48+
; REDEFINE: %{exitWeight} = 0
49+
; REDEFINE: %{loopWeight} = 1
50+
; REDEFINE: %{loopFreqOld} = 2147483647.8
51+
; REDEFINE: %{loopFreqNew} = 2147483647.8
52+
; RUN: %{run}
53+
54+
; Everything other than loop should be 1.0 because it is reached once.
55+
;
56+
; CHECK: block-frequency-info: test
57+
; CHECK-NEXT: - entry: float = 1.0,
58+
; CHECK-NEXT: - loop: float = [[LOOP_FREQ_OLD]],
59+
; CHECK-NEXT: - exit: float = 1.0,
60+
;
61+
; CHECK: block-frequency-info: test
62+
; CHECK-NEXT: - entry: float = 1.0,
63+
; CHECK-NEXT: - loop: float = [[LOOP_FREQ_NEW]],
64+
; CHECK-NEXT: - exit.peel.begin: float = 1.0,
65+
; CHECK-NEXT: - loop.peel: float = 1.0,
66+
; CHECK-NEXT: - exit.peel.next: float = 1.0,
67+
; CHECK-NEXT: - loop.peel.next: float = 1.0,
68+
; CHECK-NEXT: - exit: float = 1.0,
69+
70+
declare void @f(i32)
71+
72+
define void @test() {
73+
entry:
74+
br label %loop
75+
76+
loop:
77+
%i = phi i32 [ 0, %entry ], [ %inc, %loop ]
78+
%isLast = icmp eq i32 %i, 20
79+
%sel = select i1 %isLast, i32 1, i32 0
80+
call void @f(i32 %sel)
81+
%inc = add i32 %i, 1
82+
%isLast1 = icmp eq i32 %i, 20
83+
br i1 %isLast1, label %exit, label %loop, !prof !0
84+
85+
exit:
86+
ret void
87+
}

0 commit comments

Comments
 (0)