Skip to content

Commit e8a1162

Browse files
authored
[SimplifyCFG] Use range check in simplifyBranchOnICmpChain if possible (#165105)
In `simplifyBranchOnICmpChain`, if we can merge the comparisons into a range check, use a conditional branch instead. This change also breaks the cycle found in #165088. Closes #165088. Detailed description of the cycle: ``` define void @pr165088_cycle_1(i8 %x) { entry: %switch = icmp uge i8 %x, 2 %cond1 = icmp ugt i8 %x, 1 %or.cond = and i1 %switch, %cond1 br i1 %or.cond, label %block3, label %block2 block1: %cond2 = icmp ugt i8 %x, 1 br i1 %cond2, label %block3, label %block2 block2: br label %block3 block3: %cond3 = icmp eq i8 %x, 0 br i1 %cond3, label %exit, label %block1 exit: ret void } ``` `simplifyBranchOnICmpChain` folds the branch in `entry` to a switch. Then we get: ``` entry: switch i8 %x, label %block3 [ i8 1, label %block2 i8 0, label %block2 ] ... ``` `performValueComparisonIntoPredecessorFolding` redirects the default target `block3` into `block1` because `%x` is never zero. ``` entry: switch i8 %x, label %block1 [ i8 1, label %block2 i8 0, label %block2 ] ... ``` Then `turnSwitchRangeIntoICmp` will convert the switch back into a branch on icmp. ``` entry: %switch = icmp ult i8 %x, 2 br i1 %switch, label %block2, label %block1 ... ``` Since `block1` and `block2` share the same successor `block3`, `performBranchToCommonDestFolding` merges the conditions of `entry` and `block1`, resulting in the original pattern again.
1 parent d604ab6 commit e8a1162

File tree

2 files changed

+232
-26
lines changed

2 files changed

+232
-26
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5228,32 +5228,52 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI,
52285228
CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
52295229
}
52305230

5231-
// Create the new switch instruction now.
5232-
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5233-
if (HasProfile) {
5234-
// We know the weight of the default case. We don't know the weight of the
5235-
// other cases, but rather than completely lose profiling info, we split
5236-
// the remaining probability equally over them.
5237-
SmallVector<uint32_t> NewWeights(Values.size() + 1);
5238-
NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped if
5239-
// TrueWhenEqual.
5240-
for (auto &V : drop_begin(NewWeights))
5241-
V = BranchWeights[0] / Values.size();
5242-
setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5243-
}
5244-
5245-
// Add all of the 'cases' to the switch instruction.
5246-
for (ConstantInt *Val : Values)
5247-
New->addCase(Val, EdgeBB);
5248-
5249-
// We added edges from PI to the EdgeBB. As such, if there were any
5250-
// PHI nodes in EdgeBB, they need entries to be added corresponding to
5251-
// the number of edges added.
5252-
for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5253-
PHINode *PN = cast<PHINode>(BBI);
5254-
Value *InVal = PN->getIncomingValueForBlock(BB);
5255-
for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5256-
PN->addIncoming(InVal, BB);
5231+
// Check if we can represent the values as a contiguous range. If so, we use a
5232+
// range check + conditional branch instead of a switch.
5233+
if (Values.front()->getValue() - Values.back()->getValue() ==
5234+
Values.size() - 1) {
5235+
ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5236+
Values.back()->getValue(), Values.front()->getValue() + 1);
5237+
APInt Offset, RHS;
5238+
ICmpInst::Predicate Pred;
5239+
RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5240+
Value *X = CompVal;
5241+
if (!Offset.isZero())
5242+
X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5243+
Value *Cond =
5244+
Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5245+
BranchInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5246+
if (HasProfile)
5247+
setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5248+
// We don't need to update PHI nodes since we don't add any new edges.
5249+
} else {
5250+
// Create the new switch instruction now.
5251+
SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5252+
if (HasProfile) {
5253+
// We know the weight of the default case. We don't know the weight of the
5254+
// other cases, but rather than completely lose profiling info, we split
5255+
// the remaining probability equally over them.
5256+
SmallVector<uint32_t> NewWeights(Values.size() + 1);
5257+
NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5258+
// if TrueWhenEqual.
5259+
for (auto &V : drop_begin(NewWeights))
5260+
V = BranchWeights[0] / Values.size();
5261+
setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5262+
}
5263+
5264+
// Add all of the 'cases' to the switch instruction.
5265+
for (ConstantInt *Val : Values)
5266+
New->addCase(Val, EdgeBB);
5267+
5268+
// We added edges from PI to the EdgeBB. As such, if there were any
5269+
// PHI nodes in EdgeBB, they need entries to be added corresponding to
5270+
// the number of edges added.
5271+
for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5272+
PHINode *PN = cast<PHINode>(BBI);
5273+
Value *InVal = PN->getIncomingValueForBlock(BB);
5274+
for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5275+
PN->addIncoming(InVal, BB);
5276+
}
52575277
}
52585278

52595279
// Erase the old branch instruction.
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -passes="simplifycfg<switch-range-to-icmp>" < %s | FileCheck %s
3+
4+
; Avoid getting stuck in the cycle pr165088_cycle_[1-4].
5+
6+
define void @pr165088_cycle_1(i8 %x) {
7+
; CHECK-LABEL: define void @pr165088_cycle_1(
8+
; CHECK-SAME: i8 [[X:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[X]], 2
11+
; CHECK-NEXT: br i1 [[TMP0]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
12+
; CHECK: [[BLOCK1:.*]]:
13+
; CHECK-NEXT: [[COND2:%.*]] = icmp ugt i8 [[X]], 1
14+
; CHECK-NEXT: br i1 [[COND2]], label %[[BLOCK3]], label %[[BLOCK2]]
15+
; CHECK: [[BLOCK2]]:
16+
; CHECK-NEXT: br label %[[BLOCK3]]
17+
; CHECK: [[BLOCK3]]:
18+
; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0
19+
; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK1]]
20+
; CHECK: [[EXIT]]:
21+
; CHECK-NEXT: ret void
22+
;
23+
entry:
24+
%switch = icmp uge i8 %x, 2
25+
%cond1 = icmp ugt i8 %x, 1
26+
%or.cond = and i1 %switch, %cond1
27+
br i1 %or.cond, label %block3, label %block2
28+
29+
block1:
30+
%cond2 = icmp ugt i8 %x, 1
31+
br i1 %cond2, label %block3, label %block2
32+
33+
block2:
34+
br label %block3
35+
36+
block3:
37+
%cond3 = icmp eq i8 %x, 0
38+
br i1 %cond3, label %exit, label %block1
39+
40+
exit:
41+
ret void
42+
}
43+
44+
define void @pr165088_cycle_2(i8 %x) {
45+
; CHECK-LABEL: define void @pr165088_cycle_2(
46+
; CHECK-SAME: i8 [[X:%.*]]) {
47+
; CHECK-NEXT: [[ENTRY:.*:]]
48+
; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[X]], 2
49+
; CHECK-NEXT: br i1 [[SWITCH]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
50+
; CHECK: [[BLOCK1:.*]]:
51+
; CHECK-NEXT: [[COND2:%.*]] = icmp ugt i8 [[X]], 1
52+
; CHECK-NEXT: br i1 [[COND2]], label %[[BLOCK3]], label %[[BLOCK2]]
53+
; CHECK: [[BLOCK2]]:
54+
; CHECK-NEXT: br label %[[BLOCK3]]
55+
; CHECK: [[BLOCK3]]:
56+
; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0
57+
; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK1]]
58+
; CHECK: [[EXIT]]:
59+
; CHECK-NEXT: ret void
60+
;
61+
entry:
62+
switch i8 %x, label %block3 [
63+
i8 1, label %block2
64+
i8 0, label %block2
65+
]
66+
67+
block1: ; preds = %block3
68+
%cond2 = icmp ugt i8 %x, 1
69+
br i1 %cond2, label %block3, label %block2
70+
71+
block2: ; preds = %entry, %entry, %block1
72+
br label %block3
73+
74+
block3: ; preds = %entry, %block2, %block1
75+
%cond3 = icmp eq i8 %x, 0
76+
br i1 %cond3, label %exit, label %block1
77+
78+
exit: ; preds = %block3
79+
ret void
80+
}
81+
82+
define void @pr165088_cycle_3(i8 %x) {
83+
; CHECK-LABEL: define void @pr165088_cycle_3(
84+
; CHECK-SAME: i8 [[X:%.*]]) {
85+
; CHECK-NEXT: [[ENTRY:.*:]]
86+
; CHECK-NEXT: br label %[[BLOCK3:.*]]
87+
; CHECK: [[BLOCK3]]:
88+
; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0
89+
; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK3]]
90+
; CHECK: [[EXIT]]:
91+
; CHECK-NEXT: ret void
92+
;
93+
entry:
94+
switch i8 %x, label %block1 [
95+
i8 1, label %block2
96+
i8 0, label %block2
97+
]
98+
99+
block1: ; preds = %entry, %block3
100+
%cond2 = icmp ugt i8 %x, 1
101+
br i1 %cond2, label %block3, label %block2
102+
103+
block2: ; preds = %entry, %entry, %block1
104+
br label %block3
105+
106+
block3: ; preds = %block2, %block1
107+
%cond3 = icmp eq i8 %x, 0
108+
br i1 %cond3, label %exit, label %block1
109+
110+
exit: ; preds = %block3
111+
ret void
112+
}
113+
114+
define void @pr165088_cycle_4(i8 %x) {
115+
; CHECK-LABEL: define void @pr165088_cycle_4(
116+
; CHECK-SAME: i8 [[X:%.*]]) {
117+
; CHECK-NEXT: [[ENTRY:.*:]]
118+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[X]], 2
119+
; CHECK-NEXT: br i1 [[TMP0]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
120+
; CHECK: [[BLOCK1:.*]]:
121+
; CHECK-NEXT: [[COND2_OLD:%.*]] = icmp ugt i8 [[X]], 1
122+
; CHECK-NEXT: br i1 [[COND2_OLD]], label %[[BLOCK3]], label %[[BLOCK2]]
123+
; CHECK: [[BLOCK2]]:
124+
; CHECK-NEXT: br label %[[BLOCK3]]
125+
; CHECK: [[BLOCK3]]:
126+
; CHECK-NEXT: [[COND3:%.*]] = icmp eq i8 [[X]], 0
127+
; CHECK-NEXT: br i1 [[COND3]], label %[[EXIT:.*]], label %[[BLOCK1]]
128+
; CHECK: [[EXIT]]:
129+
; CHECK-NEXT: ret void
130+
;
131+
entry:
132+
%switch = icmp ult i8 %x, 2
133+
br i1 %switch, label %block2, label %block1
134+
135+
block1: ; preds = %entry, %block3
136+
%cond2 = icmp ugt i8 %x, 1
137+
br i1 %cond2, label %block3, label %block2
138+
139+
block2: ; preds = %entry, %block1
140+
br label %block3
141+
142+
block3: ; preds = %block2, %block1
143+
%cond3 = icmp eq i8 %x, 0
144+
br i1 %cond3, label %exit, label %block1
145+
146+
exit: ; preds = %block3
147+
ret void
148+
}
149+
150+
define void @pr165088_original(i8 %x) {
151+
; CHECK-LABEL: define void @pr165088_original(
152+
; CHECK-SAME: i8 [[X:%.*]]) {
153+
; CHECK-NEXT: [[ENTRY:.*:]]
154+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[X]], 2
155+
; CHECK-NEXT: br i1 [[TMP0]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
156+
; CHECK: [[BLOCK1:.*]]:
157+
; CHECK-NEXT: [[COND3_OLD_OLD:%.*]] = icmp ugt i8 [[X]], 1
158+
; CHECK-NEXT: br i1 [[COND3_OLD_OLD]], label %[[BLOCK3]], label %[[BLOCK2]]
159+
; CHECK: [[BLOCK2]]:
160+
; CHECK-NEXT: br label %[[BLOCK3]]
161+
; CHECK: [[BLOCK3]]:
162+
; CHECK-NEXT: [[COND4:%.*]] = icmp eq i8 [[X]], 0
163+
; CHECK-NEXT: br i1 [[COND4]], label %[[EXIT:.*]], label %[[BLOCK1]]
164+
; CHECK: [[EXIT]]:
165+
; CHECK-NEXT: ret void
166+
;
167+
entry:
168+
%cond = icmp ne i8 %x, 0
169+
%cond3 = icmp ne i8 %x, 0
170+
%or.cond = and i1 %cond, %cond3
171+
br i1 %or.cond, label %block3, label %block2
172+
173+
block1: ; preds = %block3
174+
%cond3.old = icmp ugt i8 %x, 1
175+
br i1 %cond3.old, label %block3, label %block2
176+
177+
block2: ; preds = %block1, %entry
178+
br label %block3
179+
180+
block3: ; preds = %block2, %block1, %entry
181+
%cond4 = icmp eq i8 %x, 0
182+
br i1 %cond4, label %exit, label %block1
183+
184+
exit: ; preds = %block3
185+
ret void
186+
}

0 commit comments

Comments
 (0)