Skip to content

Commit 81860fc

Browse files
mtrofinluciechoi
authored andcommitted
[SLU][profcheck] Estimate branch weights in partial unswitch cases (llvm#164035)
In the case of a partial unswitch, we take the invariant part of an expression consisting of either conjunctions or disjunctions, and hoist it out of the loop, conditioning a branch on it (==the invariant part). We can't correctly calculate the branch probability of this new branch, but can use the probability of the existing branch as a bound. That would preserve block frequencies better than allowing for the default, static (50-50) probability for that branch. Issue llvm#147390
1 parent ef896dc commit 81860fc

File tree

4 files changed

+305
-12
lines changed

4 files changed

+305
-12
lines changed

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ STATISTIC(
8181
STATISTIC(NumInvariantConditionsInjected,
8282
"Number of invariant conditions injected and unswitched");
8383

84+
namespace llvm {
8485
static cl::opt<bool> EnableNonTrivialUnswitch(
8586
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
8687
cl::desc("Forcibly enables non-trivial loop unswitching rather than "
@@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions(
131132

132133
static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
133134
"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
134-
cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
135-
"unswitch on them to eliminate branches that are "
136-
"not-taken 1/<this option> times or less."),
135+
cl::Hidden,
136+
cl::desc("Only try to inject loop invariant conditions and "
137+
"unswitch on them to eliminate branches that are "
138+
"not-taken 1/<this option> times or less."),
137139
cl::init(16));
138140

141+
static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
142+
cl::Hidden, cl::init(true));
143+
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
144+
} // namespace llvm
145+
139146
AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
140147
namespace {
141148
struct CompareDesc {
@@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,
268275
llvm_unreachable("Basic blocks should never be empty!");
269276
}
270277

271-
/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
278+
/// Copy a set of loop invariant values \p Invariants and insert them at the
272279
/// end of \p BB and conditionally branch on the copied condition. We only
273280
/// branch on a single value.
281+
/// We attempt to estimate the profile of the resulting conditional branch from
282+
/// \p ComputeProfFrom, which is the original conditional branch we're
283+
/// unswitching.
284+
/// When \p Direction is true, the \p Invariants form a disjunction, and the
285+
/// branch conditioned on it exits the loop on the "true" case. When \p
286+
/// Direction is false, the \p Invariants form a conjunction and the branch
287+
/// exits on the "false" case.
274288
static void buildPartialUnswitchConditionalBranch(
275289
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
276290
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
277-
const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
291+
const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
292+
const BranchInst &ComputeProfFrom) {
293+
294+
SmallVector<uint32_t> BranchWeights;
295+
bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
296+
extractBranchWeights(ComputeProfFrom, BranchWeights);
297+
// If Direction is true, that means we had a disjunction and that the "true"
298+
// case exits. The probability of the disjunction of the subset of terms is at
299+
// most as high as the original one. So, if the probability is higher than the
300+
// one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
301+
// but if it's lower, we will use the original probability.
302+
// Conversely, if Direction is false, that means we had a conjunction, and the
303+
// probability of exiting is captured in the second branch weight. That
304+
// probability is a disjunction (of the negation of the original terms). The
305+
// same reasoning applies as above.
306+
// Issue #165649: should we expect BFI to conserve, and use that to calculate
307+
// the branch weights?
308+
if (HasBranchWeights &&
309+
static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
310+
static_cast<double>(sum_of(BranchWeights)) >
311+
0.5)
312+
HasBranchWeights = false;
313+
278314
IRBuilder<> IRB(&BB);
279315
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
280316

@@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch(
287323

288324
Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
289325
: IRB.CreateAnd(FrozenInvariants);
290-
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
291-
Direction ? &NormalSucc : &UnswitchedSucc);
326+
auto *BR = IRB.CreateCondBr(
327+
Cond, Direction ? &UnswitchedSucc : &NormalSucc,
328+
Direction ? &NormalSucc : &UnswitchedSucc,
329+
HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
330+
: nullptr);
331+
if (!HasBranchWeights)
332+
setExplicitlyUnknownBranchWeightsIfProfiled(
333+
*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
292334
}
293335

294336
/// Copy a set of loop invariant values, and conditionally branch on them.
@@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
658700
" condition!");
659701
buildPartialUnswitchConditionalBranch(
660702
*OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
661-
FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
703+
FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
662704
}
663705

664706
// Update the dominator tree with the added edge.
@@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants(
24772519
else {
24782520
buildPartialUnswitchConditionalBranch(
24792521
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
2480-
FreezeLoopUnswitchCond, BI, &AC, DT);
2522+
FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
24812523
}
24822524
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
24832525

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; RUN: split-file %s %t
2+
; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
3+
; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
4+
; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof
5+
; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof
6+
7+
;--- main.ll
8+
declare i32 @a()
9+
declare i32 @b()
10+
11+
define i32 @or(ptr %ptr, i1 %cond) !prof !0 {
12+
entry:
13+
br label %loop_begin
14+
15+
loop_begin:
16+
%v1 = load i1, ptr %ptr
17+
%cond_or = or i1 %v1, %cond
18+
br i1 %cond_or, label %loop_a, label %loop_b, !prof !1
19+
20+
loop_a:
21+
call i32 @a()
22+
br label %latch
23+
24+
loop_b:
25+
call i32 @b()
26+
br label %latch
27+
28+
latch:
29+
%v2 = load i1, ptr %ptr
30+
br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
31+
32+
loop_exit:
33+
ret i32 0
34+
}
35+
36+
define i32 @and(ptr %ptr, i1 %cond) !prof !0 {
37+
entry:
38+
br label %loop_begin
39+
40+
loop_begin:
41+
%v1 = load i1, ptr %ptr
42+
%cond_and = and i1 %v1, %cond
43+
br i1 %cond_and, label %loop_a, label %loop_b, !prof !1
44+
45+
loop_a:
46+
call i32 @a()
47+
br label %latch
48+
49+
loop_b:
50+
call i32 @b()
51+
br label %latch
52+
53+
latch:
54+
%v2 = load i1, ptr %ptr
55+
br i1 %v2, label %loop_begin, label %loop_exit, !prof !2
56+
57+
loop_exit:
58+
ret i32 0
59+
}
60+
61+
;--- probable-or.prof
62+
!0 = !{!"function_entry_count", i32 10}
63+
!1 = !{!"branch_weights", i32 1, i32 1000}
64+
!2 = !{!"branch_weights", i32 5, i32 7}
65+
; CHECK-LABEL: @or
66+
; CHECK-LABEL: entry:
67+
; CHECK-NEXT: %cond.fr = freeze i1 %cond
68+
; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
69+
; CHECK-LABEL: @and
70+
; CHECK-LABEL: entry:
71+
; CHECK-NEXT: %cond.fr = freeze i1 %cond
72+
; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
73+
; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
74+
; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"}
75+
76+
;--- probable-and.prof
77+
!0 = !{!"function_entry_count", i32 10}
78+
!1 = !{!"branch_weights", i32 1000, i32 1}
79+
!2 = !{!"branch_weights", i32 5, i32 7}
80+
; CHECK-LABEL: @or
81+
; CHECK-LABEL: entry:
82+
; CHECK-NEXT: %cond.fr = freeze i1 %cond
83+
; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
84+
; CHECK-LABEL: @and
85+
; CHECK-LABEL: entry:
86+
; CHECK-NEXT: %cond.fr = freeze i1 %cond
87+
; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
88+
; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
89+
; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1}

llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
22
; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch<nontrivial>,loop-instsimplify)' -S | FileCheck %s
33

44
define void @test() {
@@ -7,7 +7,7 @@ define void @test() {
77
; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition()
88
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
99
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
10-
; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]]
10+
; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
1111
; CHECK: bb.split:
1212
; CHECK-NEXT: br label [[BB3:%.*]]
1313
; CHECK: bb3:
@@ -19,7 +19,7 @@ define void @test() {
1919
; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ]
2020
; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2
2121
; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]]
22-
; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
22+
; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]]
2323
; CHECK: bb9.us:
2424
; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]]
2525
; CHECK: bb16.split.us:
@@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition()
9696

9797
!0 = !{!"branch_weights", i32 1048576, i32 1}
9898

99+
;.
100+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) }
101+
;.
102+
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1}
103+
;.
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
; RUN: split-file %s %t
2+
; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
3+
; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
4+
; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof
5+
; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof
6+
;
7+
; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
8+
; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF
9+
10+
; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
11+
; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
12+
13+
; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
14+
; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF
15+
16+
; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
17+
; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK
18+
19+
;--- main.ll
20+
declare void @some_func() noreturn
21+
22+
define i32 @or(i1 %cond1, i32 %var1) !prof !0 {
23+
entry:
24+
br label %loop_begin
25+
26+
loop_begin:
27+
%var3 = phi i32 [%var1, %entry], [%var2, %do_something]
28+
%cond2 = icmp eq i32 %var3, 10
29+
%cond.or = or i1 %cond1, %cond2
30+
br i1 %cond.or, label %loop_exit, label %do_something, !prof !1
31+
32+
do_something:
33+
%var2 = add i32 %var3, 1
34+
call void @some_func() noreturn nounwind
35+
br label %loop_begin
36+
37+
loop_exit:
38+
ret i32 0
39+
}
40+
41+
define i32 @and(i1 %cond1, i32 %var1) !prof !0 {
42+
entry:
43+
br label %loop_begin
44+
45+
loop_begin:
46+
%var3 = phi i32 [%var1, %entry], [%var2, %do_something]
47+
%cond2 = icmp eq i32 %var3, 10
48+
%cond.and = and i1 %cond1, %cond2
49+
br i1 %cond.and, label %do_something, label %loop_exit, !prof !1
50+
51+
do_something:
52+
%var2 = add i32 %var3, 1
53+
call void @some_func() noreturn nounwind
54+
br label %loop_begin
55+
56+
loop_exit:
57+
ret i32 0
58+
}
59+
60+
;--- probable-or.prof
61+
!0 = !{!"function_entry_count", i32 10}
62+
!1 = !{!"branch_weights", i32 1, i32 1000}
63+
; CHECK-LABEL: @or
64+
; CHECK-LABEL: entry:
65+
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
66+
; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
67+
; CHECK-LABEL: @and
68+
; CHECK-LABEL: entry:
69+
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
70+
; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
71+
; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
72+
; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"}
73+
74+
; PROFILE-COM: Printing analysis results of BFI for function 'or':
75+
; PROFILE-COM: block-frequency-info: or
76+
; PROFILE-COM: - entry: {{.*}} count = 10
77+
; PROFILE-COM: - loop_begin: {{.*}} count = 10010
78+
; PROFILE-COM: - do_something: {{.*}} count = 10000
79+
; PROFILE-COM: - loop_exit: {{.*}} count = 10
80+
81+
; PROFILE-COM: Printing analysis results of BFI for function 'and':
82+
; PROFILE-COM: block-frequency-info: and
83+
; PROFILE-COM: - entry: {{.*}} count = 10
84+
; PROFILE-COM: - loop_begin: {{.*}} count = 10
85+
; PROFILE-COM: - do_something: {{.*}} count = 0
86+
; PROFILE-COM: - loop_exit: {{.*}} count = 10
87+
88+
; PROFILE-COM: Printing analysis results of BFI for function 'or':
89+
; PROFILE-COM: block-frequency-info: or
90+
; PROFILE-COM: - entry: {{.*}} count = 10
91+
; PROFILE-REF: - entry.split: {{.*}} count = 5
92+
; PROFILE-CHK: - entry.split: {{.*}} count = 10
93+
; PROFILE-REF: - loop_begin: {{.*}} count = 5005
94+
; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
95+
; PROFILE-REF: - do_something: {{.*}} count = 5000
96+
; PROFILE-CHK: - do_something: {{.*}} count = 9990
97+
; PROFILE-REF: - loop_exit: {{.*}} count = 5
98+
; PROFILE-CHK: - loop_exit: {{.*}} count = 10
99+
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
100+
101+
; PROFILE-COM: Printing analysis results of BFI for function 'and':
102+
; PROFILE-COM: block-frequency-info: and
103+
; PROFILE-COM: - entry: {{.*}} count = 10
104+
; PROFILE-COM: - entry.split: {{.*}} count = 5
105+
; PROFILE-COM: - loop_begin: {{.*}} count = 5
106+
; PROFILE-COM: - do_something: {{.*}} count = 0
107+
; PROFILE-COM: - loop_exit: {{.*}} count = 5
108+
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
109+
110+
;--- probable-and.prof
111+
!0 = !{!"function_entry_count", i32 10}
112+
!1 = !{!"branch_weights", i32 1000, i32 1}
113+
; CHECK-LABEL: @or
114+
; CHECK-LABEL: entry:
115+
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
116+
; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
117+
; CHECK-LABEL: @and
118+
; CHECK-LABEL: entry:
119+
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
120+
; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
121+
; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
122+
; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1}
123+
; PROFILE-COM: Printing analysis results of BFI for function 'or':
124+
; PROFILE-COM: block-frequency-info: or
125+
; PROFILE-COM: - entry: {{.*}}, count = 10
126+
; PROFILE-COM: - loop_begin: {{.*}}, count = 10
127+
; PROFILE-COM: - do_something: {{.*}}, count = 0
128+
; PROFILE-COM: - loop_exit: {{.*}}, count = 10
129+
130+
; PROFILE-COM: Printing analysis results of BFI for function 'and':
131+
; PROFILE-COM: block-frequency-info: and
132+
; PROFILE-COM: - entry: {{.*}} count = 10
133+
; PROFILE-COM: - loop_begin: {{.*}} count = 10010
134+
; PROFILE-COM: - do_something: {{.*}} count = 10000
135+
; PROFILE-COM: - loop_exit: {{.*}} count = 10
136+
137+
; PROFILE-COM: Printing analysis results of BFI for function 'or':
138+
; PROFILE-COM: block-frequency-info: or
139+
; PROFILE-COM: - entry: {{.*}} count = 10
140+
; PROFILE-COM: - entry.split: {{.*}} count = 5
141+
; PROFILE-COM: - loop_begin: {{.*}} count = 5
142+
; PROFILE-COM: - do_something: {{.*}} count = 0
143+
; PROFILE-COM: - loop_exit: {{.*}} count = 5
144+
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10
145+
146+
; PROFILE-COM: Printing analysis results of BFI for function 'and':
147+
; PROFILE-COM: block-frequency-info: and
148+
; PROFILE-COM: - entry: {{.*}} count = 10
149+
; PROFILE-REF: - entry.split: {{.*}} count = 5
150+
; PROFILE-CHK: - entry.split: {{.*}} count = 10
151+
; PROFILE-REF: - loop_begin: {{.*}} count = 5005
152+
; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
153+
; PROFILE-REF: - do_something: {{.*}} count = 5000
154+
; PROFILE-CHK: - do_something: {{.*}} count = 9990
155+
; PROFILE-REF: - loop_exit: {{.*}} count = 5
156+
; PROFILE-CHK: - loop_exit: {{.*}} count = 10
157+
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10

0 commit comments

Comments
 (0)