Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fbfa46a
[SimplifyCFG] precommit tests for simplify switch with duplicate arms
michaelmaitland Oct 30, 2024
2db9f00
[SimplifyCFG] Simplify switch instruction that has duplicate arms
michaelmaitland Oct 30, 2024
4e56067
fixup! respond to review
michaelmaitland Oct 30, 2024
9e10655
fixup! refactor for general approach
michaelmaitland Oct 30, 2024
64ddee6
fixup! move PHI checks
michaelmaitland Oct 30, 2024
c8eecb6
fixup! make it O(n)
michaelmaitland Oct 31, 2024
9e9afc2
fixup! use successor BBs in hash
michaelmaitland Oct 31, 2024
ef1bc74
fixup! use hasNPredsOrMore for early exit capability
michaelmaitland Oct 31, 2024
794d4e8
fixup! don't add to Cases if size() != 1 to improve performance
michaelmaitland Oct 31, 2024
32b627f
fixup! precompute getIncomingValueForBlock
michaelmaitland Nov 1, 2024
4190352
fixup! only support unconditional branches
michaelmaitland Nov 1, 2024
f04d67f
fixup! try improving getHashValue
michaelmaitland Nov 1, 2024
4ca2777
fixup! nitty cleanup
michaelmaitland Nov 1, 2024
28620df
fixup! don't reproccess map for same BB
michaelmaitland Nov 1, 2024
9dcf124
fixup! avoid calls to getIncomingValueForBlock
michaelmaitland Nov 1, 2024
41edd15
fixup! drop Seen vector since we no longer build phi map in loop
michaelmaitland Nov 1, 2024
5c247db
fixup! respond to review
michaelmaitland Nov 2, 2024
c3a1361
fixup! some cleanup and add Seen checks
michaelmaitland Nov 2, 2024
b563080
fixup! try and avoid some resizes
michaelmaitland Nov 5, 2024
67b3c80
fixup! presize PhiPredIVs based on first pass data
michaelmaitland Nov 5, 2024
94ff86e
fixup! use reserve instead of resize
michaelmaitland Nov 5, 2024
c6a5e52
fixup! precompute incoming values from BB
michaelmaitland Nov 5, 2024
241fef5
fixup! don't hash an empty set of values
michaelmaitland Nov 5, 2024
aa4cf43
fixup! add some comments
michaelmaitland Nov 5, 2024
8153c4b
fixup! simplify getHashValue
michaelmaitland Nov 5, 2024
d1cd111
fixup! another attempt at fixing hashing
michaelmaitland Nov 5, 2024
4949890
fixup! fix crashes
michaelmaitland Nov 5, 2024
e91253c
fixup~ Revert changes that precompute for getHashValue.
michaelmaitland Nov 5, 2024
d87ea1d
fixup! use SmallVector instead of vector
michaelmaitland Nov 7, 2024
be55920
fixup! update dom tree
michaelmaitland Nov 11, 2024
085b048
fixup! use get/set Sucessor
michaelmaitland Nov 11, 2024
3f0b020
fixup! avoid extra insert
michaelmaitland Nov 12, 2024
3430a66
fixup! respond to review
michaelmaitland Nov 14, 2024
852f3f0
fixup! respond to review
michaelmaitland Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ class SimplifyCFGOpt {
bool simplifyCleanupReturn(CleanupReturnInst *RI);
bool simplifyUnreachable(UnreachableInst *UI);
bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
bool simplifyIndirectBr(IndirectBrInst *IBI);
bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
Expand Down Expand Up @@ -7436,6 +7437,179 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder,
return true;
}

/// Checking whether two cases of SI are equal depends on the contents of the
/// BasicBlock and the incoming values of their successor PHINodes.
/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
/// calling this function on each BasicBlock every time isEqual is called,
/// especially since the same BasicBlock may be passed as an argument multiple
/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
/// of the incoming values.
struct SwitchSuccWrapper {
// Keep so we can use SwitchInst::setSuccessor to do the replacement. It won't
// be important to equality though.
unsigned SuccNum;
BasicBlock *Dest;
DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> *PhiPredIVs;
};

namespace llvm {
template <> struct DenseMapInfo<const SwitchSuccWrapper *> {
static const SwitchSuccWrapper *getEmptyKey() {
return static_cast<SwitchSuccWrapper *>(
DenseMapInfo<void *>::getEmptyKey());
}
static const SwitchSuccWrapper *getTombstoneKey() {
return static_cast<SwitchSuccWrapper *>(
DenseMapInfo<void *>::getTombstoneKey());
}
static unsigned getHashValue(const SwitchSuccWrapper *SSW) {
BasicBlock *Succ = SSW->Dest;
BranchInst *BI = cast<BranchInst>(Succ->getTerminator());
assert(BI->isUnconditional() &&
"Only supporting unconditional branches for now");
assert(BI->getNumSuccessors() == 1 &&
"Expected unconditional branches to have one successor");
assert(Succ->size() == 1 && "Expected just a single branch in the BB");

// Since we assume the BB is just a single BranchInst with a single
// successor, we hash as the BB and the incoming Values of its successor
// PHIs. Initially, we tried to just use the successor BB as the hash, but
// including the incoming PHI values leads to better performance.
// We also tried to build a map from BB -> Succs.IncomingValues ahead of
// time and passing it in SwitchSuccWrapper, but this slowed down the
// average compile time without having any impact on the worst case compile
// time.
BasicBlock *BB = BI->getSuccessor(0);
SmallVector<Value *> PhiValsForBB;
for (PHINode &Phi : BB->phis())
PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]);

return hash_combine(
BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end()));
}
static bool isEqual(const SwitchSuccWrapper *LHS,
const SwitchSuccWrapper *RHS) {
auto EKey = DenseMapInfo<SwitchSuccWrapper *>::getEmptyKey();
auto TKey = DenseMapInfo<SwitchSuccWrapper *>::getTombstoneKey();
if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey)
return LHS == RHS;

BasicBlock *A = LHS->Dest;
BasicBlock *B = RHS->Dest;

// FIXME: we checked that the size of A and B are both 1 in
// simplifyDuplicateSwitchArms to make the Case list smaller to
// improve performance. If we decide to support BasicBlocks with more
// than just a single instruction, we need to check that A.size() ==
// B.size() here, and we need to check more than just the BranchInsts
// for equality.

BranchInst *ABI = cast<BranchInst>(A->getTerminator());
BranchInst *BBI = cast<BranchInst>(B->getTerminator());
assert(ABI->isUnconditional() && BBI->isUnconditional() &&
"Only supporting unconditional branches for now");
if (ABI->getSuccessor(0) != BBI->getSuccessor(0))
return false;

// Need to check that PHIs in successor have matching values
BasicBlock *Succ = ABI->getSuccessor(0);
for (PHINode &Phi : Succ->phis()) {
auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
if (PredIVs[A] != PredIVs[B])
return false;
}

return true;
}
};
} // namespace llvm

bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
DomTreeUpdater *DTU) {
// Build Cases. Skip BBs that are not candidates for simplification. Mark
// PHINodes which need to be processed into PhiPredIVs. We decide to process
// an entire PHI at once after the loop, opposed to calling
// getIncomingValueForBlock inside this loop, since each call to
// getIncomingValueForBlock is O(|Preds|).
SmallPtrSet<PHINode *, 8> Phis;
SmallPtrSet<BasicBlock *, 8> Seen;
DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
SmallVector<SwitchSuccWrapper> Cases;
Cases.reserve(SI->getNumSuccessors());

for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) {
BasicBlock *BB = SI->getSuccessor(I);

// FIXME: Support more than just a single BranchInst. One way we could do
// this is by taking a hashing approach of all insts in BB.
if (BB->size() != 1)
continue;

// FIXME: This case needs some extra care because the terminators other than
// SI need to be updated.
if (BB->hasNPredecessorsOrMore(2))
continue;

// FIXME: Relax that the terminator is a BranchInst by checking for equality
// on other kinds of terminators. We decide to only support unconditional
// branches for now for compile time reasons.
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || BI->isConditional())
continue;

if (Seen.insert(BB).second) {
// Keep track of which PHIs we need as keys in PhiPredIVs below.
for (BasicBlock *Succ : BI->successors())
for (PHINode &Phi : Succ->phis())
Phis.insert(&Phi);
}
Cases.emplace_back(SwitchSuccWrapper{I, BB, &PhiPredIVs});
}

// Precompute a data structure to improve performance of isEqual for
// SwitchSuccWrapper.
PhiPredIVs.reserve(Phis.size());
for (PHINode *Phi : Phis) {
PhiPredIVs[Phi] =
SmallDenseMap<BasicBlock *, Value *, 8>(Phi->getNumIncomingValues());
for (auto &IV : Phi->incoming_values())
PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()});
}

// Build a set such that if the SwitchSuccWrapper exists in the set and
// another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper
// which is not in the set should be replaced with the one in the set. If the
// SwitchSuccWrapper is not in the set, then it should be added to the set so
// other SwitchSuccWrappers can check against it in the same manner. We use
// SwitchSuccWrapper instead of just BasicBlock because we'd like to pass
// around information to isEquality, getHashValue, and when doing the
// replacement with better performance.
DenseSet<const SwitchSuccWrapper *> ReplaceWith;
ReplaceWith.reserve(Cases.size());

SmallVector<DominatorTree::UpdateType> Updates;
Updates.reserve(ReplaceWith.size());
bool MadeChange = false;
for (auto &SSW : Cases) {
// SSW is a candidate for simplification. If we find a duplicate BB,
// replace it.
const auto [It, Inserted] = ReplaceWith.insert(&SSW);
if (!Inserted) {
// We know that SI's parent BB no longer dominates the old case successor
// since we are making it dead.
Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest});
SI->setSuccessor(SSW.SuccNum, (*It)->Dest);
MadeChange = true;
}
}

if (DTU)
DTU->applyUpdates(Updates);

return MadeChange;
}

bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
BasicBlock *BB = SI->getParent();

Expand Down Expand Up @@ -7496,6 +7670,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
return requestResimplify();

if (simplifyDuplicateSwitchArms(SI, DTU))
return requestResimplify();

return false;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/PhaseOrdering/switch-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ define i8 @test_switch_with_sext_phi(i8 %code) {
; CHECK-SAME: i8 [[CODE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i8 [[CODE]], label [[SW_EPILOG:%.*]] [
; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]]
; CHECK-NEXT: i8 108, label [[SW_BB2:%.*]]
; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb2:
; CHECK-NEXT: br label [[SW_EPILOG]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) {
; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [
; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]]
; NO_FWD-NEXT: i32 19, label [[IF19:%.*]]
; NO_FWD-NEXT: i32 42, label [[IF42:%.*]]
; NO_FWD-NEXT: i32 42, label [[IF19]]
; NO_FWD-NEXT: ]
; NO_FWD: if19:
; NO_FWD-NEXT: br label [[RETURN]]
; NO_FWD: if42:
; NO_FWD-NEXT: br label [[RETURN]]
; NO_FWD: else3:
; NO_FWD-NEXT: br label [[RETURN]]
; NO_FWD: return:
; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ]
; NO_FWD-NEXT: ret i32 [[R]]
;
; FWD-LABEL: @PR34471(
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/SimplifyCFG/HoistCode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) {
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [
; CHECK-NEXT: i64 1, label [[BB1:%.*]]
; CHECK-NEXT: i64 2, label [[BB2:%.*]]
; CHECK-NEXT: i64 2, label [[BB1]]
; CHECK-NEXT: ]
; CHECK: bb1:
; CHECK-NEXT: br label [[END]]
; CHECK: bb2:
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret float [[COND]]
;
entry:
Expand Down
129 changes: 129 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S \
; RUN: | FileCheck %s -check-prefix=SIMPLIFY-CFG

define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) {
; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms(
; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB6:.*]] [
; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB5:.*]]
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB5]]
; SIMPLIFY-CFG-NEXT: ]
; SIMPLIFY-CFG: [[BB5]]:
; SIMPLIFY-CFG-NEXT: br label %[[BB6]]
; SIMPLIFY-CFG: [[BB6]]:
; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ]
; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]]
;
switch i32 %1, label %7 [
i32 0, label %5
i32 1, label %6
]

5:
br label %7

6:
br label %7

7:
%8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ]
ret i32 %8
}

define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms(
; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [
; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB6:.*]]
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6]]
; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB7:.*]]
; SIMPLIFY-CFG-NEXT: ]
; SIMPLIFY-CFG: [[BB6]]:
; SIMPLIFY-CFG-NEXT: br label %[[BB8]]
; SIMPLIFY-CFG: [[BB7]]:
; SIMPLIFY-CFG-NEXT: br label %[[BB8]]
; SIMPLIFY-CFG: [[BB8]]:
; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ]
; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]]
;
switch i32 %1, label %9 [
i32 0, label %6
i32 1, label %7
i32 2, label %8
]

6:
br label %9

7:
br label %9

8:
br label %9

9:
%10 = phi i32 [ %3, %5 ], [ %4, %8 ], [ %2, %7 ], [ %2, %6 ]
ret i32 %10
}

define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
; SIMPLIFY-CFG-LABEL: define i32 @switch_duplicate_arms_multipred(
; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) {
; SIMPLIFY-CFG-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]]
; SIMPLIFY-CFG: [[BB6]]:
; SIMPLIFY-CFG-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [
; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB7]]
; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB8:.*]]
; SIMPLIFY-CFG-NEXT: ]
; SIMPLIFY-CFG: [[BB7]]:
; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
; SIMPLIFY-CFG: [[BB8]]:
; SIMPLIFY-CFG-NEXT: br label %[[BB9]]
; SIMPLIFY-CFG: [[BB9]]:
; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ]
; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]]
;
br i1 %0, label %6, label %7
6:
switch i32 %2, label %9 [
i32 0, label %7
i32 1, label %8
]

7:
br label %9

8:
br label %9

9:
%10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ]
ret i32 %10
}

define i32 @switch_dup_default(i32 %0, i32 %1, i32 %2, i32 %3) {
; SIMPLIFY-CFG-LABEL: define i32 @switch_dup_default(
; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) {
; SIMPLIFY-CFG-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP1]], 0
; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = select i1 [[COND]], i32 [[TMP3]], i32 [[TMP2]]
; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]]
;
switch i32 %1, label %7 [
i32 0, label %5
i32 1, label %6
]

5:
br label %8

6:
br label %8

7:
br label %8

8:
%9 = phi i32 [ %3, %5 ], [ %2, %6 ], [ %2, %7 ]
ret i32 %9
}
6 changes: 2 additions & 4 deletions llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
Original file line number Diff line number Diff line change
Expand Up @@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) {
; CHECK-NEXT: i32 0, label [[END:%.*]]
; CHECK-NEXT: i32 2, label [[END]]
; CHECK-NEXT: i32 4, label [[CASE3:%.*]]
; CHECK-NEXT: i32 6, label [[CASE4:%.*]]
; CHECK-NEXT: i32 6, label [[CASE3]]
; CHECK-NEXT: ]
; CHECK: case3:
; CHECK-NEXT: br label [[END]]
; CHECK: case4:
; CHECK-NEXT: br label [[END]]
; CHECK: default:
; CHECK-NEXT: unreachable
; CHECK: end:
; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ]
; CHECK-NEXT: ret i8 [[T0]]
;
entry:
Expand Down