diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 72228b445a8b6..fc1de6746461e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -276,6 +276,7 @@ class SimplifyCFGOpt { bool simplifyCleanupReturn(CleanupReturnInst *RI); bool simplifyUnreachable(UnreachableInst *UI); bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); + bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU); bool simplifyIndirectBr(IndirectBrInst *IBI); bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); @@ -7436,6 +7437,179 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, return true; } +/// Checking whether two cases of SI are equal depends on the contents of the +/// BasicBlock and the incoming values of their successor PHINodes. +/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid +/// calling this function on each BasicBlock every time isEqual is called, +/// especially since the same BasicBlock may be passed as an argument multiple +/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock -> +/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking +/// of the incoming values. +struct SwitchSuccWrapper { + // Keep so we can use SwitchInst::setSuccessor to do the replacement. It won't + // be important to equality though. + unsigned SuccNum; + BasicBlock *Dest; + DenseMap> *PhiPredIVs; +}; + +namespace llvm { +template <> struct DenseMapInfo { + static const SwitchSuccWrapper *getEmptyKey() { + return static_cast( + DenseMapInfo::getEmptyKey()); + } + static const SwitchSuccWrapper *getTombstoneKey() { + return static_cast( + DenseMapInfo::getTombstoneKey()); + } + static unsigned getHashValue(const SwitchSuccWrapper *SSW) { + BasicBlock *Succ = SSW->Dest; + BranchInst *BI = cast(Succ->getTerminator()); + assert(BI->isUnconditional() && + "Only supporting unconditional branches for now"); + assert(BI->getNumSuccessors() == 1 && + "Expected unconditional branches to have one successor"); + assert(Succ->size() == 1 && "Expected just a single branch in the BB"); + + // Since we assume the BB is just a single BranchInst with a single + // successor, we hash as the BB and the incoming Values of its successor + // PHIs. Initially, we tried to just use the successor BB as the hash, but + // including the incoming PHI values leads to better performance. + // We also tried to build a map from BB -> Succs.IncomingValues ahead of + // time and passing it in SwitchSuccWrapper, but this slowed down the + // average compile time without having any impact on the worst case compile + // time. + BasicBlock *BB = BI->getSuccessor(0); + SmallVector PhiValsForBB; + for (PHINode &Phi : BB->phis()) + PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]); + + return hash_combine( + BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); + } + static bool isEqual(const SwitchSuccWrapper *LHS, + const SwitchSuccWrapper *RHS) { + auto EKey = DenseMapInfo::getEmptyKey(); + auto TKey = DenseMapInfo::getTombstoneKey(); + if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey) + return LHS == RHS; + + BasicBlock *A = LHS->Dest; + BasicBlock *B = RHS->Dest; + + // FIXME: we checked that the size of A and B are both 1 in + // simplifyDuplicateSwitchArms to make the Case list smaller to + // improve performance. If we decide to support BasicBlocks with more + // than just a single instruction, we need to check that A.size() == + // B.size() here, and we need to check more than just the BranchInsts + // for equality. + + BranchInst *ABI = cast(A->getTerminator()); + BranchInst *BBI = cast(B->getTerminator()); + assert(ABI->isUnconditional() && BBI->isUnconditional() && + "Only supporting unconditional branches for now"); + if (ABI->getSuccessor(0) != BBI->getSuccessor(0)) + return false; + + // Need to check that PHIs in successor have matching values + BasicBlock *Succ = ABI->getSuccessor(0); + for (PHINode &Phi : Succ->phis()) { + auto &PredIVs = (*LHS->PhiPredIVs)[&Phi]; + if (PredIVs[A] != PredIVs[B]) + return false; + } + + return true; + } +}; +} // namespace llvm + +bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, + DomTreeUpdater *DTU) { + // Build Cases. Skip BBs that are not candidates for simplification. Mark + // PHINodes which need to be processed into PhiPredIVs. We decide to process + // an entire PHI at once after the loop, opposed to calling + // getIncomingValueForBlock inside this loop, since each call to + // getIncomingValueForBlock is O(|Preds|). + SmallPtrSet Phis; + SmallPtrSet Seen; + DenseMap> PhiPredIVs; + SmallVector Cases; + Cases.reserve(SI->getNumSuccessors()); + + for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) { + BasicBlock *BB = SI->getSuccessor(I); + + // FIXME: Support more than just a single BranchInst. One way we could do + // this is by taking a hashing approach of all insts in BB. + if (BB->size() != 1) + continue; + + // FIXME: This case needs some extra care because the terminators other than + // SI need to be updated. + if (BB->hasNPredecessorsOrMore(2)) + continue; + + // FIXME: Relax that the terminator is a BranchInst by checking for equality + // on other kinds of terminators. We decide to only support unconditional + // branches for now for compile time reasons. + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->isConditional()) + continue; + + if (Seen.insert(BB).second) { + // Keep track of which PHIs we need as keys in PhiPredIVs below. + for (BasicBlock *Succ : BI->successors()) + for (PHINode &Phi : Succ->phis()) + Phis.insert(&Phi); + } + Cases.emplace_back(SwitchSuccWrapper{I, BB, &PhiPredIVs}); + } + + // Precompute a data structure to improve performance of isEqual for + // SwitchSuccWrapper. + PhiPredIVs.reserve(Phis.size()); + for (PHINode *Phi : Phis) { + PhiPredIVs[Phi] = + SmallDenseMap(Phi->getNumIncomingValues()); + for (auto &IV : Phi->incoming_values()) + PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); + } + + // Build a set such that if the SwitchSuccWrapper exists in the set and + // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper + // which is not in the set should be replaced with the one in the set. If the + // SwitchSuccWrapper is not in the set, then it should be added to the set so + // other SwitchSuccWrappers can check against it in the same manner. We use + // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass + // around information to isEquality, getHashValue, and when doing the + // replacement with better performance. + DenseSet ReplaceWith; + ReplaceWith.reserve(Cases.size()); + + SmallVector Updates; + Updates.reserve(ReplaceWith.size()); + bool MadeChange = false; + for (auto &SSW : Cases) { + // SSW is a candidate for simplification. If we find a duplicate BB, + // replace it. + const auto [It, Inserted] = ReplaceWith.insert(&SSW); + if (!Inserted) { + // We know that SI's parent BB no longer dominates the old case successor + // since we are making it dead. + Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest}); + SI->setSuccessor(SSW.SuccNum, (*It)->Dest); + MadeChange = true; + } + } + + if (DTU) + DTU->applyUpdates(Updates); + + return MadeChange; +} + bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -7496,6 +7670,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts)) return requestResimplify(); + if (simplifyDuplicateSwitchArms(SI, DTU)) + return requestResimplify(); + return false; } diff --git a/llvm/test/Transforms/PhaseOrdering/switch-sext.ll b/llvm/test/Transforms/PhaseOrdering/switch-sext.ll index e39771b629e01..0e352ba52f6a2 100644 --- a/llvm/test/Transforms/PhaseOrdering/switch-sext.ll +++ b/llvm/test/Transforms/PhaseOrdering/switch-sext.ll @@ -6,8 +6,8 @@ define i8 @test_switch_with_sext_phi(i8 %code) { ; CHECK-SAME: i8 [[CODE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: switch i8 [[CODE]], label [[SW_EPILOG:%.*]] [ -; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]] ; CHECK-NEXT: i8 108, label [[SW_BB2:%.*]] +; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb2: ; CHECK-NEXT: br label [[SW_EPILOG]] diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll index 8ad455eb9e7f2..4623eb2c5dd3c 100644 --- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll +++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll @@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) { ; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [ ; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]] ; NO_FWD-NEXT: i32 19, label [[IF19:%.*]] -; NO_FWD-NEXT: i32 42, label [[IF42:%.*]] +; NO_FWD-NEXT: i32 42, label [[IF19]] ; NO_FWD-NEXT: ] ; NO_FWD: if19: ; NO_FWD-NEXT: br label [[RETURN]] -; NO_FWD: if42: -; NO_FWD-NEXT: br label [[RETURN]] ; NO_FWD: else3: ; NO_FWD-NEXT: br label [[RETURN]] ; NO_FWD: return: -; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] +; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] ; NO_FWD-NEXT: ret i32 [[R]] ; ; FWD-LABEL: @PR34471( diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll index fe0b48028a3b6..fbe41d891c1ec 100644 --- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll +++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll @@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [ ; CHECK-NEXT: i64 1, label [[BB1:%.*]] -; CHECK-NEXT: i64 2, label [[BB2:%.*]] +; CHECK-NEXT: i64 2, label [[BB1]] ; CHECK-NEXT: ] ; CHECK: bb1: ; CHECK-NEXT: br label [[END]] -; CHECK: bb2: -; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[COND]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll new file mode 100644 index 0000000000000..0df4deba1a156 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S \ +; RUN: | FileCheck %s -check-prefix=SIMPLIFY-CFG + +define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms( +; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) { +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB6:.*]] [ +; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB5:.*]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB5]] +; SIMPLIFY-CFG-NEXT: ] +; SIMPLIFY-CFG: [[BB5]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB6]] +; SIMPLIFY-CFG: [[BB6]]: +; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] +; + switch i32 %1, label %7 [ + i32 0, label %5 + i32 1, label %6 + ] + +5: + br label %7 + +6: + br label %7 + +7: + %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ] + ret i32 %8 +} + +define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms( +; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) { +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [ +; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB6:.*]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6]] +; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB7:.*]] +; SIMPLIFY-CFG-NEXT: ] +; SIMPLIFY-CFG: [[BB6]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB8]] +; SIMPLIFY-CFG: [[BB7]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB8]] +; SIMPLIFY-CFG: [[BB8]]: +; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] +; + switch i32 %1, label %9 [ + i32 0, label %6 + i32 1, label %7 + i32 2, label %8 + ] + +6: + br label %9 + +7: + br label %9 + +8: + br label %9 + +9: + %10 = phi i32 [ %3, %5 ], [ %4, %8 ], [ %2, %7 ], [ %2, %6 ] + ret i32 %10 +} + +define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %4) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_duplicate_arms_multipred( +; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) { +; SIMPLIFY-CFG-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]] +; SIMPLIFY-CFG: [[BB6]]: +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [ +; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB7]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB8:.*]] +; SIMPLIFY-CFG-NEXT: ] +; SIMPLIFY-CFG: [[BB7]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB8]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB9]]: +; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] +; + br i1 %0, label %6, label %7 +6: + switch i32 %2, label %9 [ + i32 0, label %7 + i32 1, label %8 + ] + +7: + br label %9 + +8: + br label %9 + +9: + %10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ] + ret i32 %10 +} + +define i32 @switch_dup_default(i32 %0, i32 %1, i32 %2, i32 %3) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_dup_default( +; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) { +; SIMPLIFY-CFG-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = select i1 [[COND]], i32 [[TMP3]], i32 [[TMP2]] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] +; + switch i32 %1, label %7 [ + i32 0, label %5 + i32 1, label %6 + ] + +5: + br label %8 + +6: + br label %8 + +7: + br label %8 + +8: + %9 = phi i32 [ %3, %5 ], [ %2, %6 ], [ %2, %7 ] + ret i32 %9 +} diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll index 1e2f18b3f339d..50998e447b71d 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll @@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) { ; CHECK-NEXT: i32 0, label [[END:%.*]] ; CHECK-NEXT: i32 2, label [[END]] ; CHECK-NEXT: i32 4, label [[CASE3:%.*]] -; CHECK-NEXT: i32 6, label [[CASE4:%.*]] +; CHECK-NEXT: i32 6, label [[CASE3]] ; CHECK-NEXT: ] ; CHECK: case3: ; CHECK-NEXT: br label [[END]] -; CHECK: case4: -; CHECK-NEXT: br label [[END]] ; CHECK: default: ; CHECK-NEXT: unreachable ; CHECK: end: -; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ] +; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ] ; CHECK-NEXT: ret i8 [[T0]] ; entry: