Skip to content

Commit c2cf2dd

Browse files
[SimplifyCFG] Extend simplifySwitchOfPowersOfTwo to reachable defaults
Favour a `cttz`-indexed table lookup over an indirect jump table when the default switch case is reachable, by branching non-power-of-two inputs to the default case. Proofs: https://alive2.llvm.org/ce/z/HeRAtf.
1 parent 8555764 commit c2cf2dd

File tree

2 files changed

+48
-41
lines changed

2 files changed

+48
-41
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7422,6 +7422,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
74227422
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
74237423
/// address of the jump target, and indirectly jump to it).
74247424
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7425+
DomTreeUpdater *DTU,
74257426
const DataLayout &DL,
74267427
const TargetTransformInfo &TTI) {
74277428
Value *Condition = SI->getCondition();
@@ -7444,12 +7445,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
74447445
if (SI->getNumCases() < 4)
74457446
return false;
74467447

7447-
// We perform this optimization only for switches with
7448-
// unreachable default case.
7449-
// This assumtion will save us from checking if `Condition` is a power of two.
7450-
if (!SI->defaultDestUnreachable())
7451-
return false;
7452-
74537448
// Check that switch cases are powers of two.
74547449
SmallVector<uint64_t, 4> Values;
74557450
for (const auto &Case : SI->cases()) {
@@ -7469,6 +7464,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
74697464

74707465
Builder.SetInsertPoint(SI);
74717466

7467+
if (!SI->defaultDestUnreachable()) {
7468+
// Let non-power-of-two inputs jump to the default case, when the latter is
7469+
// reachable.
7470+
auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7471+
auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7472+
7473+
auto *OrigBB = SI->getParent();
7474+
auto *DefaultCaseBB = SI->getDefaultDest();
7475+
BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7476+
auto It = OrigBB->getTerminator()->getIterator();
7477+
BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
7478+
It->eraseFromParent();
7479+
7480+
addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7481+
if (DTU)
7482+
DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7483+
}
7484+
74727485
// Replace each case with its trailing zeros number.
74737486
for (auto &Case : SI->cases()) {
74747487
auto *OrigValue = Case.getCaseValue();
@@ -7827,7 +7840,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
78277840
simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
78287841
return requestResimplify();
78297842

7830-
if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7843+
if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
78317844
return requestResimplify();
78327845

78337846
if (reduceSwitchRange(SI, Builder, DL, TTI))

llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,25 +39,20 @@ define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
3939
; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
4040
; CHECK-SAME: i32 [[ARG:%.*]]) {
4141
; CHECK-NEXT: [[ENTRY:.*]]:
42-
; CHECK-NEXT: switch i32 [[ARG]], label %[[RETURN:.*]] [
43-
; CHECK-NEXT: i32 1, label %[[BB1:.*]]
44-
; CHECK-NEXT: i32 8, label %[[BB2:.*]]
45-
; CHECK-NEXT: i32 16, label %[[BB3:.*]]
46-
; CHECK-NEXT: i32 32, label %[[BB4:.*]]
47-
; CHECK-NEXT: i32 64, label %[[BB5:.*]]
48-
; CHECK-NEXT: ]
49-
; CHECK: [[BB1]]:
50-
; CHECK-NEXT: br label %[[RETURN]]
51-
; CHECK: [[BB2]]:
52-
; CHECK-NEXT: br label %[[RETURN]]
53-
; CHECK: [[BB3]]:
54-
; CHECK-NEXT: br label %[[RETURN]]
55-
; CHECK: [[BB4]]:
56-
; CHECK-NEXT: br label %[[RETURN]]
57-
; CHECK: [[BB5]]:
42+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
43+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
44+
; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
45+
; CHECK: [[ENTRY_SPLIT]]:
46+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
47+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
48+
; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
49+
; CHECK: [[SWITCH_LOOKUP]]:
50+
; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
51+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
52+
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
5853
; CHECK-NEXT: br label %[[RETURN]]
5954
; CHECK: [[RETURN]]:
60-
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 3, %[[BB1]] ], [ 2, %[[BB2]] ], [ 1, %[[BB3]] ], [ 0, %[[BB4]] ], [ 42, %[[BB5]] ], [ 5, %[[ENTRY]] ]
55+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
6156
; CHECK-NEXT: ret i32 [[PHI]]
6257
;
6358
entry:
@@ -87,25 +82,24 @@ define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond
8782
; CHECK-NEXT: [[ENTRY:.*]]:
8883
; CHECK-NEXT: br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]]
8984
; CHECK: [[SWITCH]]:
90-
; CHECK-NEXT: switch i32 [[ARG]], label %[[RETURN]] [
91-
; CHECK-NEXT: i32 1, label %[[BB1:.*]]
92-
; CHECK-NEXT: i32 8, label %[[BB2:.*]]
93-
; CHECK-NEXT: i32 16, label %[[BB3:.*]]
94-
; CHECK-NEXT: i32 32, label %[[BB4:.*]]
95-
; CHECK-NEXT: i32 64, label %[[BB5:.*]]
96-
; CHECK-NEXT: ]
97-
; CHECK: [[BB1]]:
98-
; CHECK-NEXT: br label %[[RETURN]]
99-
; CHECK: [[BB2]]:
100-
; CHECK-NEXT: br label %[[RETURN]]
101-
; CHECK: [[BB3]]:
102-
; CHECK-NEXT: br label %[[RETURN]]
103-
; CHECK: [[BB4]]:
104-
; CHECK-NEXT: br label %[[RETURN]]
105-
; CHECK: [[BB5]]:
85+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
86+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
87+
; CHECK-NEXT: br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]]
88+
; CHECK: [[SWITCH_SPLIT]]:
89+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
90+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
91+
; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8
92+
; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
93+
; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
94+
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false
95+
; CHECK-NEXT: br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
96+
; CHECK: [[SWITCH_LOOKUP]]:
97+
; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
98+
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]]
99+
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
106100
; CHECK-NEXT: br label %[[RETURN]]
107101
; CHECK: [[RETURN]]:
108-
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 3, %[[BB1]] ], [ 2, %[[BB2]] ], [ 1, %[[BB3]] ], [ 0, %[[BB4]] ], [ 42, %[[BB5]] ], [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH]] ]
102+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
109103
; CHECK-NEXT: ret i32 [[PHI]]
110104
;
111105
entry:

0 commit comments

Comments
 (0)