Skip to content

Commit affed57

Browse files
authored
[DFAJumpThreading] Add MaxOuterUseBlocks threshold (#163428)
For every threadable path `B1 -> B2 -> ... -> Bn`, we need to insert phi nodes into every unduplicated successor of `Bi` if there are outer uses of duplicated definitions in `B_i`. To prevent the booming of phi nodes, this patch adds a threshold for the maximum number of unduplicated successors that may contain outer uses. This threshold makes sense especially when multi-target branches like switch/indirectbr/callbr are duplicated. Note that the O3 statistics in llvm-test-suite are not influenced.
1 parent 287ca7b commit affed57

File tree

2 files changed

+359
-5
lines changed

2 files changed

+359
-5
lines changed

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,22 @@ static cl::opt<unsigned>
122122
cl::desc("Maximum cost accepted for the transformation"),
123123
cl::Hidden, cl::init(50));
124124

125-
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
126-
127-
} // namespace llvm
128-
129125
static cl::opt<double> MaxClonedRate(
130126
"dfa-max-cloned-rate",
131127
cl::desc(
132128
"Maximum cloned instructions rate accepted for the transformation"),
133129
cl::Hidden, cl::init(7.5));
134130

131+
static cl::opt<unsigned>
132+
MaxOuterUseBlocks("dfa-max-out-use-blocks",
133+
cl::desc("Maximum unduplicated blocks with outer uses "
134+
"accepted for the transformation"),
135+
cl::Hidden, cl::init(40));
136+
137+
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
138+
139+
} // namespace llvm
140+
135141
namespace {
136142
class SelectInstToUnfold {
137143
SelectInst *SI;
@@ -965,8 +971,16 @@ struct TransformDFA {
965971
// SLPVectorizer.
966972
// TODO: Thread the switch partially before reaching the threshold.
967973
uint64_t NumOrigInst = 0;
968-
for (auto *BB : DuplicateMap.keys())
974+
uint64_t NumOuterUseBlock = 0;
975+
for (auto *BB : DuplicateMap.keys()) {
969976
NumOrigInst += BB->sizeWithoutDebug();
977+
// Only unduplicated blocks with single predecessor require new phi
978+
// nodes.
979+
for (auto *Succ : successors(BB))
980+
if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor())
981+
NumOuterUseBlock++;
982+
}
983+
970984
if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
971985
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
972986
"instructions wll be cloned\n");
@@ -977,6 +991,20 @@ struct TransformDFA {
977991
return false;
978992
}
979993

994+
// Too much unduplicated blocks with outer uses may cause too much
995+
// insertions of phi nodes for duplicated definitions. TODO: Drop this
996+
// threshold if we come up with another way to reduce the number of inserted
997+
// phi nodes.
998+
if (NumOuterUseBlock > MaxOuterUseBlocks) {
999+
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
1000+
"blocks with outer uses\n");
1001+
ORE->emit([&]() {
1002+
return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
1003+
<< "Too much blocks with outer uses.";
1004+
});
1005+
return false;
1006+
}
1007+
9801008
InstructionCost DuplicationCost = 0;
9811009

9821010
unsigned JumpTableSize = 0;
Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -passes=dfa-jump-threading -dfa-max-out-use-blocks=5 %s | FileCheck %s
3+
4+
declare void @use(i32)
5+
6+
define void @max_outer_uses_by_switch(i32 %cond, ptr %p) {
7+
; CHECK-LABEL: define void @max_outer_uses_by_switch(
8+
; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*]]:
10+
; CHECK-NEXT: br label %[[SWITCH_BB:.*]]
11+
; CHECK: [[SWITCH_BB]]:
12+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DETERMINE:%.*]], %[[SUB_SWITCH_BB:.*]] ], [ 2, %[[CASE2:.*]] ]
13+
; CHECK-NEXT: switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [
14+
; CHECK-NEXT: i32 0, label %[[CASE1:.*]]
15+
; CHECK-NEXT: i32 1, label %[[CASE2]]
16+
; CHECK-NEXT: i32 2, label %[[CASE3:.*]]
17+
; CHECK-NEXT: ]
18+
; CHECK: [[CASE1]]:
19+
; CHECK-NEXT: br label %[[SUB_SWITCH_BB]]
20+
; CHECK: [[CASE3]]:
21+
; CHECK-NEXT: br label %[[SUB_SWITCH_BB]]
22+
; CHECK: [[SUB_SWITCH_BB]]:
23+
; CHECK-NEXT: [[DETERMINE]] = phi i32 [ 1, %[[CASE1]] ], [ 3, %[[CASE3]] ]
24+
; CHECK-NEXT: [[DEF:%.*]] = load i32, ptr [[P]], align 4
25+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB]] [
26+
; CHECK-NEXT: i32 0, label %[[OUTER1:.*]]
27+
; CHECK-NEXT: i32 1, label %[[OUTER2:.*]]
28+
; CHECK-NEXT: i32 2, label %[[OUTER3:.*]]
29+
; CHECK-NEXT: i32 3, label %[[OUTER4:.*]]
30+
; CHECK-NEXT: ]
31+
; CHECK: [[CASE2]]:
32+
; CHECK-NEXT: br label %[[SWITCH_BB]]
33+
; CHECK: [[OUTER1]]:
34+
; CHECK-NEXT: call void @use(i32 [[DEF]])
35+
; CHECK-NEXT: ret void
36+
; CHECK: [[OUTER2]]:
37+
; CHECK-NEXT: call void @use(i32 [[DEF]])
38+
; CHECK-NEXT: ret void
39+
; CHECK: [[OUTER3]]:
40+
; CHECK-NEXT: call void @use(i32 [[DEF]])
41+
; CHECK-NEXT: ret void
42+
; CHECK: [[OUTER4]]:
43+
; CHECK-NEXT: call void @use(i32 [[DEF]])
44+
; CHECK-NEXT: ret void
45+
; CHECK: [[DEFAULT_DEST]]:
46+
; CHECK-NEXT: ret void
47+
;
48+
entry:
49+
br label %switch_bb
50+
51+
switch_bb:
52+
%phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ]
53+
switch i32 %phi, label %default_dest [
54+
i32 0, label %case1
55+
i32 1, label %case2
56+
i32 2, label %case3
57+
]
58+
59+
case1:
60+
br label %sub_switch_bb
61+
62+
case3:
63+
br label %sub_switch_bb
64+
65+
sub_switch_bb:
66+
%determine = phi i32 [ 1, %case1 ], [ 3, %case3 ]
67+
%def = load i32, ptr %p
68+
switch i32 %cond, label %switch_bb [
69+
i32 0, label %outer1
70+
i32 1, label %outer2
71+
i32 2, label %outer3
72+
i32 3, label %outer4
73+
]
74+
75+
case2:
76+
br label %switch_bb
77+
78+
outer1:
79+
call void @use(i32 %def)
80+
ret void
81+
82+
outer2:
83+
call void @use(i32 %def)
84+
ret void
85+
86+
outer3:
87+
call void @use(i32 %def)
88+
ret void
89+
90+
outer4:
91+
call void @use(i32 %def)
92+
ret void
93+
94+
default_dest:
95+
ret void
96+
}
97+
98+
define void @less_outer_uses_by_switch(i32 %cond, ptr %p) {
99+
; CHECK-LABEL: define void @less_outer_uses_by_switch(
100+
; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) {
101+
; CHECK-NEXT: [[ENTRY:.*]]:
102+
; CHECK-NEXT: br label %[[SWITCH_BB:.*]]
103+
; CHECK: [[SWITCH_BB]]:
104+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ poison, %[[SUB_SWITCH_BB:.*]] ]
105+
; CHECK-NEXT: switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [
106+
; CHECK-NEXT: i32 0, label %[[CASE1:.*]]
107+
; CHECK-NEXT: i32 1, label %[[CASE2:.*]]
108+
; CHECK-NEXT: i32 2, label %[[CASE3:.*]]
109+
; CHECK-NEXT: ]
110+
; CHECK: [[SWITCH_BB_JT2:.*]]:
111+
; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ 2, %[[CASE2]] ]
112+
; CHECK-NEXT: br label %[[CASE3]]
113+
; CHECK: [[SWITCH_BB_JT3:.*]]:
114+
; CHECK-NEXT: [[PHI_JT3:%.*]] = phi i32 [ [[DETERMINE_JT3:%.*]], %[[SUB_SWITCH_BB_JT3:.*]] ]
115+
; CHECK-NEXT: br label %[[DEFAULT_DEST]]
116+
; CHECK: [[SWITCH_BB_JT1:.*]]:
117+
; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ [[DETERMINE_JT1:%.*]], %[[SUB_SWITCH_BB_JT1:.*]] ]
118+
; CHECK-NEXT: br label %[[CASE2]]
119+
; CHECK: [[CASE1]]:
120+
; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT1]]
121+
; CHECK: [[CASE3]]:
122+
; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT3]]
123+
; CHECK: [[SUB_SWITCH_BB]]:
124+
; CHECK-NEXT: [[DEF:%.*]] = load i32, ptr [[P]], align 4
125+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB]] [
126+
; CHECK-NEXT: i32 0, label %[[OUTER1:.*]]
127+
; CHECK-NEXT: ]
128+
; CHECK: [[SUB_SWITCH_BB_JT3]]:
129+
; CHECK-NEXT: [[DETERMINE_JT3]] = phi i32 [ 3, %[[CASE3]] ]
130+
; CHECK-NEXT: [[DEF_JT3:%.*]] = load i32, ptr [[P]], align 4
131+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT3]] [
132+
; CHECK-NEXT: i32 0, label %[[OUTER1]]
133+
; CHECK-NEXT: ]
134+
; CHECK: [[SUB_SWITCH_BB_JT1]]:
135+
; CHECK-NEXT: [[DETERMINE_JT1]] = phi i32 [ 1, %[[CASE1]] ]
136+
; CHECK-NEXT: [[DEF_JT1:%.*]] = load i32, ptr [[P]], align 4
137+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT1]] [
138+
; CHECK-NEXT: i32 0, label %[[OUTER1]]
139+
; CHECK-NEXT: ]
140+
; CHECK: [[CASE2]]:
141+
; CHECK-NEXT: br label %[[SWITCH_BB_JT2]]
142+
; CHECK: [[OUTER1]]:
143+
; CHECK-NEXT: [[DEF1:%.*]] = phi i32 [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF]], %[[SUB_SWITCH_BB]] ]
144+
; CHECK-NEXT: call void @use(i32 [[DEF1]])
145+
; CHECK-NEXT: ret void
146+
; CHECK: [[DEFAULT_DEST]]:
147+
; CHECK-NEXT: ret void
148+
;
149+
entry:
150+
br label %switch_bb
151+
152+
switch_bb:
153+
%phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ]
154+
switch i32 %phi, label %default_dest [
155+
i32 0, label %case1
156+
i32 1, label %case2
157+
i32 2, label %case3
158+
]
159+
160+
case1:
161+
br label %sub_switch_bb
162+
163+
case3:
164+
br label %sub_switch_bb
165+
166+
sub_switch_bb:
167+
%determine = phi i32 [ 1, %case1 ], [ 3, %case3 ]
168+
%def = load i32, ptr %p
169+
switch i32 %cond, label %switch_bb [
170+
i32 0, label %outer1
171+
]
172+
173+
case2:
174+
br label %switch_bb
175+
176+
outer1:
177+
call void @use(i32 %def)
178+
ret void
179+
180+
default_dest:
181+
ret void
182+
}
183+
184+
185+
define void @max_outer_uses_multi_preds(i32 %cond, ptr %p) {
186+
; CHECK-LABEL: define void @max_outer_uses_multi_preds(
187+
; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) {
188+
; CHECK-NEXT: [[ENTRY:.*]]:
189+
; CHECK-NEXT: br label %[[SWITCH_BB:.*]]
190+
; CHECK: [[SWITCH_BB]]:
191+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ poison, %[[SUB_SWITCH_BB:.*]] ]
192+
; CHECK-NEXT: switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [
193+
; CHECK-NEXT: i32 0, label %[[CASE1:.*]]
194+
; CHECK-NEXT: i32 1, label %[[CASE2:.*]]
195+
; CHECK-NEXT: i32 2, label %[[CASE3:.*]]
196+
; CHECK-NEXT: i32 3, label %[[CASE4:.*]]
197+
; CHECK-NEXT: ]
198+
; CHECK: [[SWITCH_BB_JT2:.*]]:
199+
; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ 2, %[[CASE2]] ]
200+
; CHECK-NEXT: br label %[[CASE3]]
201+
; CHECK: [[SWITCH_BB_JT3:.*]]:
202+
; CHECK-NEXT: [[PHI_JT3:%.*]] = phi i32 [ [[DETERMINE_JT3:%.*]], %[[SUB_SWITCH_BB_JT3:.*]] ]
203+
; CHECK-NEXT: br label %[[CASE4]]
204+
; CHECK: [[SWITCH_BB_JT1:.*]]:
205+
; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ [[DETERMINE_JT1:%.*]], %[[SUB_SWITCH_BB_JT1:.*]] ]
206+
; CHECK-NEXT: br label %[[CASE2]]
207+
; CHECK: [[CASE1]]:
208+
; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT1]]
209+
; CHECK: [[CASE3]]:
210+
; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT3]]
211+
; CHECK: [[SUB_SWITCH_BB]]:
212+
; CHECK-NEXT: [[DEF:%.*]] = load i32, ptr [[P]], align 4
213+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB]] [
214+
; CHECK-NEXT: i32 0, label %[[OUTER1:.*]]
215+
; CHECK-NEXT: i32 1, label %[[OUTER2:.*]]
216+
; CHECK-NEXT: i32 2, label %[[OUTER3:.*]]
217+
; CHECK-NEXT: i32 3, label %[[OUTER4:.*]]
218+
; CHECK-NEXT: ]
219+
; CHECK: [[SUB_SWITCH_BB_JT3]]:
220+
; CHECK-NEXT: [[DETERMINE_JT3]] = phi i32 [ 3, %[[CASE3]] ]
221+
; CHECK-NEXT: [[DEF_JT3:%.*]] = load i32, ptr [[P]], align 4
222+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT3]] [
223+
; CHECK-NEXT: i32 0, label %[[OUTER1]]
224+
; CHECK-NEXT: i32 1, label %[[OUTER2]]
225+
; CHECK-NEXT: i32 2, label %[[OUTER3]]
226+
; CHECK-NEXT: i32 3, label %[[OUTER4]]
227+
; CHECK-NEXT: ]
228+
; CHECK: [[SUB_SWITCH_BB_JT1]]:
229+
; CHECK-NEXT: [[DETERMINE_JT1]] = phi i32 [ 1, %[[CASE1]] ]
230+
; CHECK-NEXT: [[DEF_JT1:%.*]] = load i32, ptr [[P]], align 4
231+
; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT1]] [
232+
; CHECK-NEXT: i32 0, label %[[OUTER1]]
233+
; CHECK-NEXT: i32 1, label %[[OUTER2]]
234+
; CHECK-NEXT: i32 2, label %[[OUTER3]]
235+
; CHECK-NEXT: i32 3, label %[[OUTER4]]
236+
; CHECK-NEXT: ]
237+
; CHECK: [[CASE4]]:
238+
; CHECK-NEXT: [[DEF1:%.*]] = load i32, ptr [[P]], align 4
239+
; CHECK-NEXT: switch i32 [[COND]], label %[[OUTER4]] [
240+
; CHECK-NEXT: i32 0, label %[[OUTER1]]
241+
; CHECK-NEXT: i32 1, label %[[OUTER2]]
242+
; CHECK-NEXT: i32 2, label %[[OUTER3]]
243+
; CHECK-NEXT: ]
244+
; CHECK: [[CASE2]]:
245+
; CHECK-NEXT: br label %[[SWITCH_BB_JT2]]
246+
; CHECK: [[OUTER1]]:
247+
; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
248+
; CHECK-NEXT: call void @use(i32 [[PHI1]])
249+
; CHECK-NEXT: ret void
250+
; CHECK: [[OUTER2]]:
251+
; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
252+
; CHECK-NEXT: call void @use(i32 [[PHI2]])
253+
; CHECK-NEXT: ret void
254+
; CHECK: [[OUTER3]]:
255+
; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
256+
; CHECK-NEXT: call void @use(i32 [[PHI3]])
257+
; CHECK-NEXT: ret void
258+
; CHECK: [[OUTER4]]:
259+
; CHECK-NEXT: [[PHI4:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ]
260+
; CHECK-NEXT: call void @use(i32 [[PHI4]])
261+
; CHECK-NEXT: ret void
262+
; CHECK: [[DEFAULT_DEST]]:
263+
; CHECK-NEXT: ret void
264+
;
265+
entry:
266+
br label %switch_bb
267+
268+
switch_bb:
269+
%phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ]
270+
switch i32 %phi, label %default_dest [
271+
i32 0, label %case1
272+
i32 1, label %case2
273+
i32 2, label %case3
274+
i32 3, label %case4
275+
]
276+
277+
case1:
278+
br label %sub_switch_bb
279+
280+
case3:
281+
br label %sub_switch_bb
282+
283+
sub_switch_bb:
284+
%determine = phi i32 [ 1, %case1 ], [ 3, %case3 ]
285+
%def = load i32, ptr %p
286+
switch i32 %cond, label %switch_bb [
287+
i32 0, label %outer1
288+
i32 1, label %outer2
289+
i32 2, label %outer3
290+
i32 3, label %outer4
291+
]
292+
293+
case4:
294+
%def1 = load i32, ptr %p
295+
switch i32 %cond, label %outer4 [
296+
i32 0, label %outer1
297+
i32 1, label %outer2
298+
i32 2, label %outer3
299+
]
300+
301+
case2:
302+
br label %switch_bb
303+
304+
outer1:
305+
%phi1 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
306+
call void @use(i32 %phi1)
307+
ret void
308+
309+
outer2:
310+
%phi2 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
311+
call void @use(i32 %phi2)
312+
ret void
313+
314+
outer3:
315+
%phi3 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
316+
call void @use(i32 %phi3)
317+
ret void
318+
319+
outer4:
320+
%phi4 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ]
321+
call void @use(i32 %phi4)
322+
ret void
323+
324+
default_dest:
325+
ret void
326+
}

0 commit comments

Comments
 (0)