Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 75 additions & 3 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,8 @@ class CodeGenPrepare {
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchType(SwitchInst *SI);
bool optimizeSwitchPhiConstants(SwitchInst *SI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeSwitchPow2Constant(SwitchInst *SI, ModifyDT &ModifiedDT);
bool optimizeSwitchInst(SwitchInst *SI, ModifyDT &ModifiedDT);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
bool fixupDbgValue(Instruction *I);
Expand Down Expand Up @@ -7888,9 +7889,80 @@ bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
return Changed;
}

bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
bool CodeGenPrepare::optimizeSwitchPow2Constant(SwitchInst *SI,
ModifyDT &ModifiedDT) {
// Try to split off and combine a case with 0 and a power-of-2 together to a
// single check and branch.

// Bail out if there either aren't enough cases to fold or too many.
if (SI->getNumCases() < 2 || SI->getNumCases() >= 8)
return false;

// Collect cases and sort them so that power-of-2s come first in ascending
// order.
SmallVector<std::pair<APInt, BasicBlock *>> Cases;
for (auto &C : SI->cases())
Cases.emplace_back(C.getCaseValue()->getValue(), C.getCaseSuccessor());
sort(Cases, [](const auto &A, const auto &B) {
const APInt &AV = A.first;
const APInt &BV = B.first;
if (AV.isPowerOf2() != BV.isPowerOf2())
return AV.isPowerOf2();
return AV.ult(BV);
});

// Bail out if we don't have a single power-of-2 constant, followed by zero
// with a common destination.
// TODO: could support multiple power-of-2s by just picking one.
BasicBlock *Dst = Cases[0].second;
APInt Pow2 = Cases[0].first;
if (Dst != Cases[1].second || !Cases[1].first.isZero() || !Pow2.isPowerOf2())
return false;

// Limit the transform to switches leaving loops for now.
if (LI->getLoopFor(Dst) == LI->getLoopFor(SI->getParent()))
return false;

// Check if there are case values before/after the power-of-2 that are
// consecutive. In that case, they can be generated as range-checks.
sort(Cases,
[](const auto &A, const auto &B) { return A.first.ult(B.first); });
auto Idx = find_if(Cases, [Pow2](const auto &C) { return C.first == Pow2; });
bool Increasing = Idx + 1 != Cases.end() && (Idx + 1)->second == Dst &&
Idx->first + 1 == (Idx + 1)->first;
bool Decreasing = Idx != Cases.begin() && (Idx - 1)->second == Dst &&
Idx->first - 1 == (Idx - 1)->first;
if (Increasing || Decreasing)
return false;

auto *OldBB = SI->getParent();
auto *NewBB = OldBB->splitBasicBlock(OldBB->getTerminator()->getIterator());
OldBB->getTerminator()->eraseFromParent();
IRBuilder<> B(OldBB);
auto *Pow2CI = ConstantInt::get(OldBB->getContext(), Pow2);
auto *And = B.CreateAnd(
SI->getCondition(),
B.CreateNeg(B.CreateAdd(Pow2CI, B.getIntN(Pow2.getBitWidth(), 1))));
auto *C = B.CreateICmpEQ(And, B.getIntN(Pow2.getBitWidth(), 0));
B.CreateCondBr(C, Dst, SI->getParent());
SI->removeCase(
SI->findCaseValue(ConstantInt::get(OldBB->getContext(), Cases[0].first)));
SI->removeCase(SI->findCaseValue(Pow2CI));

for (auto &P : Dst->phis()) {
P.addIncoming(P.getIncomingValueForBlock(NewBB), OldBB);
P.removeIncomingValue(NewBB);
P.removeIncomingValue(NewBB);
}

ModifiedDT = ModifyDT::ModifyBBDT;
return true;
}

bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI, ModifyDT &ModifiedDT) {
bool Changed = optimizeSwitchType(SI);
Changed |= optimizeSwitchPhiConstants(SI);
Changed |= optimizeSwitchPow2Constant(SI, ModifiedDT);
return Changed;
}

Expand Down Expand Up @@ -8815,7 +8887,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
case Instruction::ShuffleVector:
return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
case Instruction::Switch:
return optimizeSwitchInst(cast<SwitchInst>(I));
return optimizeSwitchInst(cast<SwitchInst>(I), ModifiedDT);
case Instruction::ExtractElement:
return optimizeExtractElementInst(cast<ExtractElementInst>(I));
case Instruction::Br:
Expand Down
204 changes: 204 additions & 0 deletions llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -O3 -mtriple=arm64-apple-macosx -o - %s | FileCheck %s

define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: mov w8, #100 ; =0x64
; CHECK-NEXT: mov w9, #223 ; =0xdf
; CHECK-NEXT: LBB0_1: ; %loop.header
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: tst w0, w9
; CHECK-NEXT: b.eq LBB0_4
; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: and w10, w0, #0xff
; CHECK-NEXT: cmp w10, #124
; CHECK-NEXT: b.eq LBB0_5
; CHECK-NEXT: ; %bb.3: ; %loop.latch
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: b.ne LBB0_1
; CHECK-NEXT: LBB0_4:
; CHECK-NEXT: mov w0, #20 ; =0x14
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_5: ; %e2
; CHECK-NEXT: mov w0, #30 ; =0x1e
; CHECK-NEXT: ret
entry:
br label %loop.header

loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
switch i8 %v, label %loop.latch [
i8 32, label %e1
i8 0, label %e1
i8 124, label %e2
]

loop.latch:
%iv.next = add i32 %iv, 1
%c = icmp eq i32 %iv.next, 100
br i1 %c, label %e1, label %loop.header

e1:
ret i32 20

e2:
ret i32 30
}

define i64 @consecutive_match_both(ptr %p, i32 %param) {
; CHECK-LABEL: consecutive_match_both:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: mov w9, #100 ; =0x64
; CHECK-NEXT: mov w10, #249 ; =0xf9
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: b LBB1_2
; CHECK-NEXT: LBB1_1: ; %loop.latch
; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: subs w9, w9, #1
; CHECK-NEXT: b.eq LBB1_5
; CHECK-NEXT: LBB1_2: ; %loop.header
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmp w1, #7
; CHECK-NEXT: b.hi LBB1_1
; CHECK-NEXT: ; %bb.3: ; %loop.header
; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: tst w8, w10
; CHECK-NEXT: b.eq LBB1_1
; CHECK-NEXT: ; %bb.4: ; %e0
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: ret
; CHECK-NEXT: LBB1_5:
; CHECK-NEXT: mov x0, #-42 ; =0xffffffffffffffd6
; CHECK-NEXT: ret
entry:
br label %loop.header

loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
switch i32 %param, label %loop.latch [
i32 7, label %e0
i32 6, label %e0
i32 5, label %e0
i32 4, label %e0
i32 3, label %e0
i32 0, label %e0
]

loop.latch:
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %e1, label %loop.header

e0:
%m = getelementptr i8, ptr %p, i64 20
br label %e1

e1:
%res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
ret i64 %res
}

define i64 @consecutive_match_before(ptr %p, i32 %param) {
; CHECK-LABEL: consecutive_match_before:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: mov w9, #100 ; =0x64
; CHECK-NEXT: mov w10, #25 ; =0x19
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: b LBB2_2
; CHECK-NEXT: LBB2_1: ; %loop.latch
; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: subs w9, w9, #1
; CHECK-NEXT: b.eq LBB2_5
; CHECK-NEXT: LBB2_2: ; %loop.header
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmp w1, #4
; CHECK-NEXT: b.hi LBB2_1
; CHECK-NEXT: ; %bb.3: ; %loop.header
; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: tst w8, w10
; CHECK-NEXT: b.eq LBB2_1
; CHECK-NEXT: ; %bb.4: ; %e0
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: ret
; CHECK-NEXT: LBB2_5:
; CHECK-NEXT: mov x0, #-42 ; =0xffffffffffffffd6
; CHECK-NEXT: ret
entry:
br label %loop.header

loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
switch i32 %param, label %loop.latch [
i32 4, label %e0
i32 3, label %e0
i32 0, label %e0
]

loop.latch:
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %e1, label %loop.header

e0:
%m = getelementptr i8, ptr %p, i64 20
br label %e1

e1:
%res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
ret i64 %res
}

define i64 @consecutive_match_after(ptr %p, i32 %param) {
; CHECK-LABEL: consecutive_match_after:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: mov w9, #100 ; =0x64
; CHECK-NEXT: mov w10, #49 ; =0x31
; CHECK-NEXT: lsl w8, w8, w1
; CHECK-NEXT: b LBB3_2
; CHECK-NEXT: LBB3_1: ; %loop.latch
; CHECK-NEXT: ; in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: subs w9, w9, #1
; CHECK-NEXT: b.eq LBB3_5
; CHECK-NEXT: LBB3_2: ; %loop.header
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmp w1, #5
; CHECK-NEXT: b.hi LBB3_1
; CHECK-NEXT: ; %bb.3: ; %loop.header
; CHECK-NEXT: ; in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: tst w8, w10
; CHECK-NEXT: b.eq LBB3_1
; CHECK-NEXT: ; %bb.4: ; %e0
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: ret
; CHECK-NEXT: LBB3_5:
; CHECK-NEXT: mov x0, #-42 ; =0xffffffffffffffd6
; CHECK-NEXT: ret
entry:
br label %loop.header

loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
switch i32 %param, label %loop.latch [
i32 5, label %e0
i32 4, label %e0
i32 0, label %e0
]

loop.latch:
%iv.next = add i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %e1, label %loop.header

e0:
%m = getelementptr i8, ptr %p, i64 20
br label %e1

e1:
%res = phi i64 [ 0, %e0 ], [ -42, %loop.latch ]
ret i64 %res
}
Loading
Loading