Skip to content

Commit 2db9f00

Browse files
[SimplifyCFG] Simplify switch instruction that has duplicate arms
I noticed that the two C functions emitted different IR: ``` int switch_duplicate_arms(int switch_val, int v, int w) { switch (switch_val) { default: break; case 0: w = v; break; case 1: w = v; break; } return w; } int if_duplicate_arms(int switch_val, int v, int w) { if (switch_val == 0) w = v; else if (switch_val == 1) w = v; return v0; } ``` For `switch_duplicate_arms`, we generate IR that looks like this: ``` define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { switch i32 %1, label %7 [ i32 0, label %5 i32 1, label %6 ] 5: br label %7 6: br label %7 7: %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ] ret i32 %8 } ``` For the equivalent `if_duplicate_arms`, we generate: ``` define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { %5 = icmp ult i32 %1, 2 %6 = select i1 %5, i32 %2, i32 %3 ret i32 %6 } ``` For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5 and %6 branch to the same location and the incoming values for %8 are the same from those blocks. We could remove one on the duplicate switch targets and update the switch with the single target. On RISC-V, prior to this patch, we generate the following code: ``` switch_duplicate_arms: li a4, 1 beq a1, a4, .LBB0_2 mv a0, a3 bnez a1, .LBB0_3 .LBB0_2: mv a0, a2 .LBB0_3: ret if_duplicate_arms: li a4, 2 mv a0, a2 bltu a1, a4, .LBB1_2 mv a0, a3 .LBB1_2: ret ``` After this patch, the O3 code is optimized to the icmp + select pair, which gives us the same code gen as `if_duplicate_arms` as desired. This may help with both code size and further switch simplification. I found that this patch causes no significant impact to spec2006/int/ref and spec2017/intrate/ref.
1 parent fbfa46a commit 2db9f00

File tree

5 files changed

+116
-44
lines changed

5 files changed

+116
-44
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ class SimplifyCFGOpt {
276276
bool simplifyCleanupReturn(CleanupReturnInst *RI);
277277
bool simplifyUnreachable(UnreachableInst *UI);
278278
bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
279+
bool simplifyDuplicateSwitchArms(SwitchInst *SI);
279280
bool simplifyIndirectBr(IndirectBrInst *IBI);
280281
bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
281282
bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
@@ -7436,6 +7437,94 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
74367437
return true;
74377438
}
74387439

7440+
bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) {
7441+
// Simplify the case where multiple arms contain only a terminator, the
7442+
// terminators are the same, and their sucessor PHIS incoming values are the
7443+
// same.
7444+
7445+
// Find BBs that are candidates for simplification.
7446+
SmallPtrSet<BasicBlock *, 8> BBs;
7447+
for (auto &Case : SI->cases()) {
7448+
BasicBlock *BB = Case.getCaseSuccessor();
7449+
7450+
// FIXME: This case needs some extra care because the terminators other than
7451+
// SI need to be updated.
7452+
if (!BB->hasNPredecessors(1))
7453+
continue;
7454+
7455+
// FIXME: Relax that the terminator is a BranchInst by checking for equality
7456+
// on other kinds of terminators.
7457+
Instruction *T = BB->getTerminator();
7458+
if (T && BB->size() == 1 && isa<BranchInst>(T))
7459+
BBs.insert(BB);
7460+
}
7461+
7462+
auto IsBranchEq = [](BranchInst *A, BranchInst *B) {
7463+
if (A->isConditional() != B->isConditional())
7464+
return false;
7465+
7466+
if (A->isConditional() && A->getCondition() != B->getCondition())
7467+
return false;
7468+
7469+
if (A->getNumSuccessors() != B->getNumSuccessors())
7470+
return false;
7471+
7472+
for (unsigned I = 0; I < A->getNumSuccessors(); ++I)
7473+
if (A->getSuccessor(I) != B->getSuccessor(I))
7474+
return false;
7475+
7476+
// Need to check that PHIs in sucessors have matching values
7477+
for (auto *Succ : A->successors()) {
7478+
for (PHINode &Phi : Succ->phis())
7479+
if (Phi.getIncomingValueForBlock(A->getParent()) !=
7480+
Phi.getIncomingValueForBlock(B->getParent()))
7481+
return false;
7482+
}
7483+
7484+
return true;
7485+
};
7486+
7487+
// Construct a map from candidate basic block to an equivalent basic block
7488+
// to replace it with. All equivalent basic blocks should be replaced with
7489+
// the same basic block. To do this, if there is no equivalent BB in the map,
7490+
// then insert into the map BB -> BB. Otherwise, we should check only elements
7491+
// in the map for equivalence to ensure that all equivalent BB get replaced
7492+
// by the BB in the map. Replacing BB with BB has no impact, so we skip
7493+
// a call to setSuccessor when we do the actual replacement.
7494+
DenseMap<BasicBlock *, BasicBlock *> ReplaceWith;
7495+
for (BasicBlock *BB : BBs) {
7496+
bool Inserted = false;
7497+
for (auto KV : ReplaceWith) {
7498+
if (IsBranchEq(cast<BranchInst>(BB->getTerminator()),
7499+
cast<BranchInst>(KV.first->getTerminator()))) {
7500+
ReplaceWith[BB] = KV.first;
7501+
Inserted = true;
7502+
break;
7503+
}
7504+
}
7505+
if (!Inserted)
7506+
ReplaceWith[BB] = BB;
7507+
}
7508+
7509+
// Do the replacement in SI.
7510+
bool MadeChange = false;
7511+
// There is no fast lookup of BasicBlock -> Cases, so we iterate over cases
7512+
// and check that the case was a candidate. BBs is already filtered, so
7513+
// hopefully calling contains on it is not too expensive.
7514+
for (auto &Case : SI->cases()) {
7515+
BasicBlock *OldSucc = Case.getCaseSuccessor();
7516+
if (!BBs.contains(OldSucc))
7517+
continue;
7518+
BasicBlock *NewSucc = ReplaceWith[OldSucc];
7519+
if (OldSucc != NewSucc) {
7520+
Case.setSuccessor(NewSucc);
7521+
MadeChange = true;
7522+
}
7523+
}
7524+
7525+
return MadeChange;
7526+
}
7527+
74397528
bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
74407529
BasicBlock *BB = SI->getParent();
74417530

@@ -7496,6 +7585,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
74967585
hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
74977586
return requestResimplify();
74987587

7588+
if (simplifyDuplicateSwitchArms(SI))
7589+
return requestResimplify();
7590+
74997591
return false;
75007592
}
75017593

llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) {
139139
; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [
140140
; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]]
141141
; NO_FWD-NEXT: i32 19, label [[IF19:%.*]]
142-
; NO_FWD-NEXT: i32 42, label [[IF42:%.*]]
142+
; NO_FWD-NEXT: i32 42, label [[IF19]]
143143
; NO_FWD-NEXT: ]
144144
; NO_FWD: if19:
145145
; NO_FWD-NEXT: br label [[RETURN]]
146-
; NO_FWD: if42:
147-
; NO_FWD-NEXT: br label [[RETURN]]
148146
; NO_FWD: else3:
149147
; NO_FWD-NEXT: br label [[RETURN]]
150148
; NO_FWD: return:
151-
; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
149+
; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
152150
; NO_FWD-NEXT: ret i32 [[R]]
153151
;
154152
; FWD-LABEL: @PR34471(

llvm/test/Transforms/SimplifyCFG/HoistCode.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) {
6565
; CHECK-NEXT: entry:
6666
; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [
6767
; CHECK-NEXT: i64 1, label [[BB1:%.*]]
68-
; CHECK-NEXT: i64 2, label [[BB2:%.*]]
68+
; CHECK-NEXT: i64 2, label [[BB1]]
6969
; CHECK-NEXT: ]
7070
; CHECK: bb1:
7171
; CHECK-NEXT: br label [[END]]
72-
; CHECK: bb2:
73-
; CHECK-NEXT: br label [[END]]
7472
; CHECK: end:
75-
; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
73+
; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
7674
; CHECK-NEXT: ret float [[COND]]
7775
;
7876
entry:

llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,20 @@
55
define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
66
; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
77
; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
8-
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB7:.*]] [
8+
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB6:.*]] [
99
; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB5:.*]]
10-
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6:.*]]
10+
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB5]]
1111
; SIMPLIFY-CFG-NEXT: ]
1212
; SIMPLIFY-CFG: [[BB5]]:
13-
; SIMPLIFY-CFG-NEXT: br label %[[BB7]]
13+
; SIMPLIFY-CFG-NEXT: br label %[[BB6]]
1414
; SIMPLIFY-CFG: [[BB6]]:
15-
; SIMPLIFY-CFG-NEXT: br label %[[BB7]]
16-
; SIMPLIFY-CFG: [[BB7]]:
17-
; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
15+
; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ]
1816
; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]]
1917
;
2018
; O3-LABEL: define i32 @switch_all_duplicate_arms(
2119
; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
22-
; O3-NEXT: switch i32 [[TMP1]], label %[[BB7:.*]] [
23-
; O3-NEXT: i32 0, label %[[BB5:.*]]
24-
; O3-NEXT: i32 1, label %[[BB6:.*]]
25-
; O3-NEXT: ]
26-
; O3: [[BB5]]:
27-
; O3-NEXT: br label %[[BB7]]
28-
; O3: [[BB6]]:
29-
; O3-NEXT: br label %[[BB7]]
30-
; O3: [[BB7]]:
31-
; O3-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ]
20+
; O3-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[TMP1]], 2
21+
; O3-NEXT: [[TMP8:%.*]] = select i1 [[SWITCH]], i32 [[TMP2]], i32 [[TMP3]]
3222
; O3-NEXT: ret i32 [[TMP8]]
3323
;
3424
switch i32 %1, label %7 [
@@ -50,36 +40,32 @@ define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
5040
define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
5141
; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
5242
; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
53-
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB9:.*]] [
43+
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [
5444
; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB6:.*]]
55-
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB7:.*]]
56-
; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB8:.*]]
45+
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6]]
46+
; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB7:.*]]
5747
; SIMPLIFY-CFG-NEXT: ]
5848
; SIMPLIFY-CFG: [[BB6]]:
59-
; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
49+
; SIMPLIFY-CFG-NEXT: br label %[[BB8]]
6050
; SIMPLIFY-CFG: [[BB7]]:
61-
; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
51+
; SIMPLIFY-CFG-NEXT: br label %[[BB8]]
6252
; SIMPLIFY-CFG: [[BB8]]:
63-
; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
64-
; SIMPLIFY-CFG: [[BB9]]:
65-
; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
53+
; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
6654
; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]]
6755
;
6856
; O3-LABEL: define i32 @switch_some_duplicate_arms(
6957
; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] {
70-
; O3-NEXT: switch i32 [[TMP1]], label %[[BB9:.*]] [
58+
; O3-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [
7159
; O3-NEXT: i32 0, label %[[BB6:.*]]
72-
; O3-NEXT: i32 1, label %[[BB7:.*]]
73-
; O3-NEXT: i32 2, label %[[BB8:.*]]
60+
; O3-NEXT: i32 1, label %[[BB6]]
61+
; O3-NEXT: i32 2, label %[[BB7:.*]]
7462
; O3-NEXT: ]
7563
; O3: [[BB6]]:
76-
; O3-NEXT: br label %[[BB9]]
64+
; O3-NEXT: br label %[[BB8]]
7765
; O3: [[BB7]]:
78-
; O3-NEXT: br label %[[BB9]]
66+
; O3-NEXT: br label %[[BB8]]
7967
; O3: [[BB8]]:
80-
; O3-NEXT: br label %[[BB9]]
81-
; O3: [[BB9]]:
82-
; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
68+
; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
8369
; O3-NEXT: ret i32 [[TMP10]]
8470
;
8571
switch i32 %1, label %9 [

llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) {
272272
; CHECK-NEXT: i32 0, label [[END:%.*]]
273273
; CHECK-NEXT: i32 2, label [[END]]
274274
; CHECK-NEXT: i32 4, label [[CASE3:%.*]]
275-
; CHECK-NEXT: i32 6, label [[CASE4:%.*]]
275+
; CHECK-NEXT: i32 6, label [[CASE3]]
276276
; CHECK-NEXT: ]
277277
; CHECK: case3:
278278
; CHECK-NEXT: br label [[END]]
279-
; CHECK: case4:
280-
; CHECK-NEXT: br label [[END]]
281279
; CHECK: default:
282280
; CHECK-NEXT: unreachable
283281
; CHECK: end:
284-
; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
282+
; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
285283
; CHECK-NEXT: ret i8 [[T0]]
286284
;
287285
entry:

0 commit comments

Comments
 (0)