From fbfa46a57becdedf4a93406f7e6b3cec84bd61c9 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Wed, 30 Oct 2024 06:11:43 -0700 Subject: [PATCH 01/34] [SimplifyCFG] precommit tests for simplify switch with duplicate arms --- .../Transforms/SimplifyCFG/switch-dup-bbs.ll | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll new file mode 100644 index 0000000000000..3575597e40856 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s -check-prefix=SIMPLIFY-CFG +; RUN: opt < %s -O3 -S | FileCheck %s -check-prefix=O3 + +define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms( +; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) { +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB7:.*]] [ +; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB5:.*]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6:.*]] +; SIMPLIFY-CFG-NEXT: ] +; SIMPLIFY-CFG: [[BB5]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB7]] +; SIMPLIFY-CFG: [[BB6]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB7]] +; SIMPLIFY-CFG: [[BB7]]: +; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] +; +; O3-LABEL: define i32 @switch_all_duplicate_arms( +; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O3-NEXT: switch i32 [[TMP1]], label %[[BB7:.*]] [ +; O3-NEXT: i32 0, label %[[BB5:.*]] +; O3-NEXT: i32 1, label %[[BB6:.*]] +; O3-NEXT: ] +; O3: [[BB5]]: +; O3-NEXT: br label %[[BB7]] +; O3: [[BB6]]: +; O3-NEXT: br label %[[BB7]] +; O3: [[BB7]]: +; O3-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ] +; O3-NEXT: ret i32 [[TMP8]] +; + switch i32 %1, label %7 [ + i32 0, label %5 + i32 1, label %6 + ] + +5: + br label %7 + +6: + br label %7 + +7: + %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ] + ret i32 %8 +} + +define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms( +; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) { +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB9:.*]] [ +; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB6:.*]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB7:.*]] +; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB8:.*]] +; SIMPLIFY-CFG-NEXT: ] +; SIMPLIFY-CFG: [[BB6]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB7]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB8]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB9]]: +; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] +; +; O3-LABEL: define i32 @switch_some_duplicate_arms( +; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-NEXT: switch i32 [[TMP1]], label %[[BB9:.*]] [ +; O3-NEXT: i32 0, label %[[BB6:.*]] +; O3-NEXT: i32 1, label %[[BB7:.*]] +; O3-NEXT: i32 2, label %[[BB8:.*]] +; O3-NEXT: ] +; O3: [[BB6]]: +; O3-NEXT: br label %[[BB9]] +; O3: [[BB7]]: +; O3-NEXT: br label %[[BB9]] +; O3: [[BB8]]: +; O3-NEXT: br label %[[BB9]] +; O3: [[BB9]]: +; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] +; O3-NEXT: ret i32 [[TMP10]] +; + switch i32 %1, label %9 [ + i32 0, label %6 + i32 1, label %7 + i32 2, label %8 + ] + +6: + br label %9 + +7: + br label %9 + +8: + br label %9 + +9: + %10 = phi i32 [ %3, %5 ], [ %4, %8 ], [ %2, %7 ], [ %2, %6 ] + ret i32 %10 +} + +define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 %4) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_duplicate_arms_multipred( +; SIMPLIFY-CFG-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) { +; SIMPLIFY-CFG-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]] +; SIMPLIFY-CFG: [[BB6]]: +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [ +; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB7]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB8:.*]] +; SIMPLIFY-CFG-NEXT: ] +; SIMPLIFY-CFG: [[BB7]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB8]]: +; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG: [[BB9]]: +; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] +; +; O3-LABEL: define i32 @switch_duplicate_arms_multipred( +; O3-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]] +; O3: [[BB6]]: +; O3-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [ +; O3-NEXT: i32 0, label %[[BB7]] +; O3-NEXT: i32 1, label %[[BB8:.*]] +; O3-NEXT: ] +; O3: [[BB7]]: +; O3-NEXT: br label %[[BB9]] +; O3: [[BB8]]: +; O3-NEXT: br label %[[BB9]] +; O3: [[BB9]]: +; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ] +; O3-NEXT: ret i32 [[TMP10]] +; + br i1 %0, label %6, label %7 +6: + switch i32 %2, label %9 [ + i32 0, label %7 + i32 1, label %8 + ] + +7: + br label %9 + +8: + br label %9 + +9: + %10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ] + ret i32 %10 +} From 2db9f0055072a2172cdd4d3066b30d98d095bed3 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Wed, 30 Oct 2024 09:01:56 -0700 Subject: [PATCH 02/34] [SimplifyCFG] Simplify switch instruction that has duplicate arms I noticed that the two C functions emitted different IR: ``` int switch_duplicate_arms(int switch_val, int v, int w) { switch (switch_val) { default: break; case 0: w = v; break; case 1: w = v; break; } return w; } int if_duplicate_arms(int switch_val, int v, int w) { if (switch_val == 0) w = v; else if (switch_val == 1) w = v; return v0; } ``` For `switch_duplicate_arms`, we generate IR that looks like this: ``` define i32 @switch_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { switch i32 %1, label %7 [ i32 0, label %5 i32 1, label %6 ] 5: br label %7 6: br label %7 7: %8 = phi i32 [ %3, %4 ], [ %2, %6 ], [ %2, %5 ] ret i32 %8 } ``` For the equivalent `if_duplicate_arms`, we generate: ``` define i32 @if_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { %5 = icmp ult i32 %1, 2 %6 = select i1 %5, i32 %2, i32 %3 ret i32 %6 } ``` For `switch_duplicate_arms`, taking case 0 and 1 are the same since %5 and %6 branch to the same location and the incoming values for %8 are the same from those blocks. We could remove one on the duplicate switch targets and update the switch with the single target. On RISC-V, prior to this patch, we generate the following code: ``` switch_duplicate_arms: li a4, 1 beq a1, a4, .LBB0_2 mv a0, a3 bnez a1, .LBB0_3 .LBB0_2: mv a0, a2 .LBB0_3: ret if_duplicate_arms: li a4, 2 mv a0, a2 bltu a1, a4, .LBB1_2 mv a0, a3 .LBB1_2: ret ``` After this patch, the O3 code is optimized to the icmp + select pair, which gives us the same code gen as `if_duplicate_arms` as desired. This may help with both code size and further switch simplification. I found that this patch causes no significant impact to spec2006/int/ref and spec2017/intrate/ref. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 92 +++++++++++++++++++ .../ForwardSwitchConditionToPHI.ll | 6 +- llvm/test/Transforms/SimplifyCFG/HoistCode.ll | 6 +- .../Transforms/SimplifyCFG/switch-dup-bbs.ll | 50 ++++------ .../SimplifyCFG/switch-to-select-two-case.ll | 6 +- 5 files changed, 116 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 72228b445a8b6..eb0c2346e08f2 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -276,6 +276,7 @@ class SimplifyCFGOpt { bool simplifyCleanupReturn(CleanupReturnInst *RI); bool simplifyUnreachable(UnreachableInst *UI); bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); + bool simplifyDuplicateSwitchArms(SwitchInst *SI); bool simplifyIndirectBr(IndirectBrInst *IBI); bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); @@ -7436,6 +7437,94 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, return true; } +bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { + // Simplify the case where multiple arms contain only a terminator, the + // terminators are the same, and their sucessor PHIS incoming values are the + // same. + + // Find BBs that are candidates for simplification. + SmallPtrSet BBs; + for (auto &Case : SI->cases()) { + BasicBlock *BB = Case.getCaseSuccessor(); + + // FIXME: This case needs some extra care because the terminators other than + // SI need to be updated. + if (!BB->hasNPredecessors(1)) + continue; + + // FIXME: Relax that the terminator is a BranchInst by checking for equality + // on other kinds of terminators. + Instruction *T = BB->getTerminator(); + if (T && BB->size() == 1 && isa(T)) + BBs.insert(BB); + } + + auto IsBranchEq = [](BranchInst *A, BranchInst *B) { + if (A->isConditional() != B->isConditional()) + return false; + + if (A->isConditional() && A->getCondition() != B->getCondition()) + return false; + + if (A->getNumSuccessors() != B->getNumSuccessors()) + return false; + + for (unsigned I = 0; I < A->getNumSuccessors(); ++I) + if (A->getSuccessor(I) != B->getSuccessor(I)) + return false; + + // Need to check that PHIs in sucessors have matching values + for (auto *Succ : A->successors()) { + for (PHINode &Phi : Succ->phis()) + if (Phi.getIncomingValueForBlock(A->getParent()) != + Phi.getIncomingValueForBlock(B->getParent())) + return false; + } + + return true; + }; + + // Construct a map from candidate basic block to an equivalent basic block + // to replace it with. All equivalent basic blocks should be replaced with + // the same basic block. To do this, if there is no equivalent BB in the map, + // then insert into the map BB -> BB. Otherwise, we should check only elements + // in the map for equivalence to ensure that all equivalent BB get replaced + // by the BB in the map. Replacing BB with BB has no impact, so we skip + // a call to setSuccessor when we do the actual replacement. + DenseMap ReplaceWith; + for (BasicBlock *BB : BBs) { + bool Inserted = false; + for (auto KV : ReplaceWith) { + if (IsBranchEq(cast(BB->getTerminator()), + cast(KV.first->getTerminator()))) { + ReplaceWith[BB] = KV.first; + Inserted = true; + break; + } + } + if (!Inserted) + ReplaceWith[BB] = BB; + } + + // Do the replacement in SI. + bool MadeChange = false; + // There is no fast lookup of BasicBlock -> Cases, so we iterate over cases + // and check that the case was a candidate. BBs is already filtered, so + // hopefully calling contains on it is not too expensive. + for (auto &Case : SI->cases()) { + BasicBlock *OldSucc = Case.getCaseSuccessor(); + if (!BBs.contains(OldSucc)) + continue; + BasicBlock *NewSucc = ReplaceWith[OldSucc]; + if (OldSucc != NewSucc) { + Case.setSuccessor(NewSucc); + MadeChange = true; + } + } + + return MadeChange; +} + bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -7496,6 +7585,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts)) return requestResimplify(); + if (simplifyDuplicateSwitchArms(SI)) + return requestResimplify(); + return false; } diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll index 8ad455eb9e7f2..4623eb2c5dd3c 100644 --- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll +++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll @@ -139,16 +139,14 @@ define i32 @PR34471(i32 %x) { ; NO_FWD-NEXT: switch i32 [[X:%.*]], label [[ELSE3:%.*]] [ ; NO_FWD-NEXT: i32 17, label [[RETURN:%.*]] ; NO_FWD-NEXT: i32 19, label [[IF19:%.*]] -; NO_FWD-NEXT: i32 42, label [[IF42:%.*]] +; NO_FWD-NEXT: i32 42, label [[IF19]] ; NO_FWD-NEXT: ] ; NO_FWD: if19: ; NO_FWD-NEXT: br label [[RETURN]] -; NO_FWD: if42: -; NO_FWD-NEXT: br label [[RETURN]] ; NO_FWD: else3: ; NO_FWD-NEXT: br label [[RETURN]] ; NO_FWD: return: -; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ [[X]], [[IF42]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] +; NO_FWD-NEXT: [[R:%.*]] = phi i32 [ [[X]], [[IF19]] ], [ 0, [[ELSE3]] ], [ 17, [[ENTRY:%.*]] ] ; NO_FWD-NEXT: ret i32 [[R]] ; ; FWD-LABEL: @PR34471( diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll index fe0b48028a3b6..fbe41d891c1ec 100644 --- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll +++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll @@ -65,14 +65,12 @@ define float @PR39535min_switch(i64 %i, float %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: switch i64 [[I:%.*]], label [[END:%.*]] [ ; CHECK-NEXT: i64 1, label [[BB1:%.*]] -; CHECK-NEXT: i64 2, label [[BB2:%.*]] +; CHECK-NEXT: i64 2, label [[BB1]] ; CHECK-NEXT: ] ; CHECK: bb1: ; CHECK-NEXT: br label [[END]] -; CHECK: bb2: -; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ [[X]], [[BB2]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = phi fast float [ [[X:%.*]], [[BB1]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[COND]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll index 3575597e40856..b12db656fdf68 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll @@ -5,30 +5,20 @@ define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { ; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms( ; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) { -; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB7:.*]] [ +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB6:.*]] [ ; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB5:.*]] -; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6:.*]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB5]] ; SIMPLIFY-CFG-NEXT: ] ; SIMPLIFY-CFG: [[BB5]]: -; SIMPLIFY-CFG-NEXT: br label %[[BB7]] +; SIMPLIFY-CFG-NEXT: br label %[[BB6]] ; SIMPLIFY-CFG: [[BB6]]: -; SIMPLIFY-CFG-NEXT: br label %[[BB7]] -; SIMPLIFY-CFG: [[BB7]]: -; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ] +; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ] ; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] ; ; O3-LABEL: define i32 @switch_all_duplicate_arms( ; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O3-NEXT: switch i32 [[TMP1]], label %[[BB7:.*]] [ -; O3-NEXT: i32 0, label %[[BB5:.*]] -; O3-NEXT: i32 1, label %[[BB6:.*]] -; O3-NEXT: ] -; O3: [[BB5]]: -; O3-NEXT: br label %[[BB7]] -; O3: [[BB6]]: -; O3-NEXT: br label %[[BB7]] -; O3: [[BB7]]: -; O3-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB6]] ], [ [[TMP2]], %[[BB5]] ] +; O3-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[TMP1]], 2 +; O3-NEXT: [[TMP8:%.*]] = select i1 [[SWITCH]], i32 [[TMP2]], i32 [[TMP3]] ; O3-NEXT: ret i32 [[TMP8]] ; switch i32 %1, label %7 [ @@ -50,36 +40,32 @@ define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { ; SIMPLIFY-CFG-LABEL: define i32 @switch_some_duplicate_arms( ; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) { -; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB9:.*]] [ +; SIMPLIFY-CFG-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [ ; SIMPLIFY-CFG-NEXT: i32 0, label %[[BB6:.*]] -; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB7:.*]] -; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB8:.*]] +; SIMPLIFY-CFG-NEXT: i32 1, label %[[BB6]] +; SIMPLIFY-CFG-NEXT: i32 2, label %[[BB7:.*]] ; SIMPLIFY-CFG-NEXT: ] ; SIMPLIFY-CFG: [[BB6]]: -; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG-NEXT: br label %[[BB8]] ; SIMPLIFY-CFG: [[BB7]]: -; SIMPLIFY-CFG-NEXT: br label %[[BB9]] +; SIMPLIFY-CFG-NEXT: br label %[[BB8]] ; SIMPLIFY-CFG: [[BB8]]: -; SIMPLIFY-CFG-NEXT: br label %[[BB9]] -; SIMPLIFY-CFG: [[BB9]]: -; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] +; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] ; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] ; ; O3-LABEL: define i32 @switch_some_duplicate_arms( ; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] { -; O3-NEXT: switch i32 [[TMP1]], label %[[BB9:.*]] [ +; O3-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [ ; O3-NEXT: i32 0, label %[[BB6:.*]] -; O3-NEXT: i32 1, label %[[BB7:.*]] -; O3-NEXT: i32 2, label %[[BB8:.*]] +; O3-NEXT: i32 1, label %[[BB6]] +; O3-NEXT: i32 2, label %[[BB7:.*]] ; O3-NEXT: ] ; O3: [[BB6]]: -; O3-NEXT: br label %[[BB9]] +; O3-NEXT: br label %[[BB8]] ; O3: [[BB7]]: -; O3-NEXT: br label %[[BB9]] +; O3-NEXT: br label %[[BB8]] ; O3: [[BB8]]: -; O3-NEXT: br label %[[BB9]] -; O3: [[BB9]]: -; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB8]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] +; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] ; O3-NEXT: ret i32 [[TMP10]] ; switch i32 %1, label %9 [ diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll index 1e2f18b3f339d..50998e447b71d 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll @@ -272,16 +272,14 @@ define i8 @switch_to_select_two_case_results_no_default(i32 %i) { ; CHECK-NEXT: i32 0, label [[END:%.*]] ; CHECK-NEXT: i32 2, label [[END]] ; CHECK-NEXT: i32 4, label [[CASE3:%.*]] -; CHECK-NEXT: i32 6, label [[CASE4:%.*]] +; CHECK-NEXT: i32 6, label [[CASE3]] ; CHECK-NEXT: ] ; CHECK: case3: ; CHECK-NEXT: br label [[END]] -; CHECK: case4: -; CHECK-NEXT: br label [[END]] ; CHECK: default: ; CHECK-NEXT: unreachable ; CHECK: end: -; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 44, [[CASE4]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ] +; CHECK-NEXT: [[T0:%.*]] = phi i8 [ 44, [[CASE3]] ], [ 42, [[ENTRY:%.*]] ], [ 42, [[ENTRY]] ] ; CHECK-NEXT: ret i8 [[T0]] ; entry: From 4e560672205f5328152251ac8cd1199494f9cda0 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Wed, 30 Oct 2024 12:12:02 -0700 Subject: [PATCH 03/34] fixup! respond to review --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index eb0c2346e08f2..74cb086f2b609 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7463,19 +7463,26 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (A->isConditional() != B->isConditional()) return false; - if (A->isConditional() && A->getCondition() != B->getCondition()) - return false; + if (A->isConditional()) { + // If the conditions are instructions, check equality up to commutativity. + // Otherwise, check that the two Values are the same. + Value *AC = A->getCondition(); + Value *BC = B->getCondition(); + auto *ACI = dyn_cast(AC); + auto *BCI = dyn_cast(BC); + if ((ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) && AC != BC) + return false; + } if (A->getNumSuccessors() != B->getNumSuccessors()) return false; - for (unsigned I = 0; I < A->getNumSuccessors(); ++I) - if (A->getSuccessor(I) != B->getSuccessor(I)) + for (unsigned I = 0; I < A->getNumSuccessors(); ++I) { + BasicBlock *ASucc = A->getSuccessor(I); + if (ASucc != B->getSuccessor(I)) return false; - - // Need to check that PHIs in sucessors have matching values - for (auto *Succ : A->successors()) { - for (PHINode &Phi : Succ->phis()) + // Need to check that PHIs in sucessors have matching values + for (PHINode &Phi : ASucc->phis()) if (Phi.getIncomingValueForBlock(A->getParent()) != Phi.getIncomingValueForBlock(B->getParent())) return false; From 9e106554ee9a52a9f3838b46195162ddc07ca18c Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Wed, 30 Oct 2024 12:31:54 -0700 Subject: [PATCH 04/34] fixup! refactor for general approach --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 74cb086f2b609..3809591f67625 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7455,7 +7455,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // FIXME: Relax that the terminator is a BranchInst by checking for equality // on other kinds of terminators. Instruction *T = BB->getTerminator(); - if (T && BB->size() == 1 && isa(T)) + if (T && isa(T)) BBs.insert(BB); } @@ -7491,6 +7491,16 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { return true; }; + auto IsBBEqualTo = [&IsBranchEq](BasicBlock *A, BasicBlock *B) { + // FIXME: Support more than just a single BranchInst. One way we could do + // this is by taking a hashing approach. + if (A->size() != 1 || B->size() != 1) + return false; + + return IsBranchEq(cast(A->getTerminator()), + cast(B->getTerminator())); + }; + // Construct a map from candidate basic block to an equivalent basic block // to replace it with. All equivalent basic blocks should be replaced with // the same basic block. To do this, if there is no equivalent BB in the map, @@ -7502,8 +7512,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { for (BasicBlock *BB : BBs) { bool Inserted = false; for (auto KV : ReplaceWith) { - if (IsBranchEq(cast(BB->getTerminator()), - cast(KV.first->getTerminator()))) { + if (IsBBEqualTo(BB, KV.first)) { ReplaceWith[BB] = KV.first; Inserted = true; break; From 64ddee66f9350510381b3c567acc570ba8efb2df Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Wed, 30 Oct 2024 14:50:37 -0700 Subject: [PATCH 05/34] fixup! move PHI checks --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 24 ++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3809591f67625..68fad8e213f60 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7477,16 +7477,9 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (A->getNumSuccessors() != B->getNumSuccessors()) return false; - for (unsigned I = 0; I < A->getNumSuccessors(); ++I) { - BasicBlock *ASucc = A->getSuccessor(I); - if (ASucc != B->getSuccessor(I)) + for (unsigned I = 0; I < A->getNumSuccessors(); ++I) + if (A->getSuccessor(I) != B->getSuccessor(I)) return false; - // Need to check that PHIs in sucessors have matching values - for (PHINode &Phi : ASucc->phis()) - if (Phi.getIncomingValueForBlock(A->getParent()) != - Phi.getIncomingValueForBlock(B->getParent())) - return false; - } return true; }; @@ -7497,8 +7490,17 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (A->size() != 1 || B->size() != 1) return false; - return IsBranchEq(cast(A->getTerminator()), - cast(B->getTerminator())); + if (!IsBranchEq(cast(A->getTerminator()), + cast(B->getTerminator()))) + return false; + + // Need to check that PHIs in sucessors have matching values + for (auto *Succ : cast(A->getTerminator())->successors()) + for (PHINode &Phi : Succ->phis()) + if (Phi.getIncomingValueForBlock(A) != Phi.getIncomingValueForBlock(B)) + return false; + + return true; }; // Construct a map from candidate basic block to an equivalent basic block From c8eecb6b9f4f5e80500fc072a661daf8b2afe92a Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 31 Oct 2024 10:04:29 -0700 Subject: [PATCH 06/34] fixup! make it O(n) --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 186 ++++++++++++---------- 1 file changed, 102 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 68fad8e213f60..f84ab936fe343 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7437,106 +7437,124 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, return true; } -bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { - // Simplify the case where multiple arms contain only a terminator, the - // terminators are the same, and their sucessor PHIS incoming values are the - // same. - - // Find BBs that are candidates for simplification. - SmallPtrSet BBs; - for (auto &Case : SI->cases()) { - BasicBlock *BB = Case.getCaseSuccessor(); - - // FIXME: This case needs some extra care because the terminators other than - // SI need to be updated. - if (!BB->hasNPredecessors(1)) - continue; +namespace llvm { +template <> struct DenseMapInfo { + static const SwitchInst::CaseHandle *getEmptyKey() { + return reinterpret_cast( + DenseMapInfo::getEmptyKey()); + } + static const SwitchInst::CaseHandle *getTombstoneKey() { + return reinterpret_cast( + DenseMapInfo::getTombstoneKey()); + } + static unsigned getHashValue(const SwitchInst::CaseHandle *Case) { + BasicBlock *Succ = Case->getCaseSuccessor(); + return hash_combine(Succ->size(), Succ->getTerminator()->getOpcode()); + } + static bool isEqual(const SwitchInst::CaseHandle *LHS, + const SwitchInst::CaseHandle *RHS) { + + auto EKey = DenseMapInfo::getEmptyKey(); + auto TKey = DenseMapInfo::getTombstoneKey(); + if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey) + return LHS == RHS; + + auto IsBranchEq = [](BranchInst *A, BranchInst *B) { + if (A->isConditional() != B->isConditional()) + return false; - // FIXME: Relax that the terminator is a BranchInst by checking for equality - // on other kinds of terminators. - Instruction *T = BB->getTerminator(); - if (T && isa(T)) - BBs.insert(BB); - } + if (A->isConditional()) { + // If the conditions are instructions, check equality up to + // commutativity. Otherwise, check that the two Values are the same. + Value *AC = A->getCondition(); + Value *BC = B->getCondition(); + auto *ACI = dyn_cast(AC); + auto *BCI = dyn_cast(BC); - auto IsBranchEq = [](BranchInst *A, BranchInst *B) { - if (A->isConditional() != B->isConditional()) - return false; + if (ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) + return false; + if ((!ACI || !BCI) && AC != BC) + return false; + } - if (A->isConditional()) { - // If the conditions are instructions, check equality up to commutativity. - // Otherwise, check that the two Values are the same. - Value *AC = A->getCondition(); - Value *BC = B->getCondition(); - auto *ACI = dyn_cast(AC); - auto *BCI = dyn_cast(BC); - if ((ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) && AC != BC) + if (A->getNumSuccessors() != B->getNumSuccessors()) return false; - } - if (A->getNumSuccessors() != B->getNumSuccessors()) - return false; + for (unsigned I = 0; I < A->getNumSuccessors(); ++I) + if (A->getSuccessor(I) != B->getSuccessor(I)) + return false; - for (unsigned I = 0; I < A->getNumSuccessors(); ++I) - if (A->getSuccessor(I) != B->getSuccessor(I)) - return false; + return true; + }; - return true; - }; + auto IsBBEq = [&IsBranchEq](BasicBlock *A, BasicBlock *B) { + // FIXME: Support more than just a single BranchInst. One way we could do + // this is by taking a hashing approach. + if (A->size() != 1 || B->size() != 1) + return false; - auto IsBBEqualTo = [&IsBranchEq](BasicBlock *A, BasicBlock *B) { - // FIXME: Support more than just a single BranchInst. One way we could do - // this is by taking a hashing approach. - if (A->size() != 1 || B->size() != 1) - return false; + if (!IsBranchEq(cast(A->getTerminator()), + cast(B->getTerminator()))) + return false; - if (!IsBranchEq(cast(A->getTerminator()), - cast(B->getTerminator()))) - return false; + // Need to check that PHIs in sucessors have matching values + for (auto *Succ : cast(A->getTerminator())->successors()) + for (PHINode &Phi : Succ->phis()) + if (Phi.getIncomingValueForBlock(A) != + Phi.getIncomingValueForBlock(B)) + return false; - // Need to check that PHIs in sucessors have matching values - for (auto *Succ : cast(A->getTerminator())->successors()) - for (PHINode &Phi : Succ->phis()) - if (Phi.getIncomingValueForBlock(A) != Phi.getIncomingValueForBlock(B)) - return false; + return true; + }; - return true; - }; + return IsBBEq(LHS->getCaseSuccessor(), RHS->getCaseSuccessor()); + } +}; +} // namespace llvm - // Construct a map from candidate basic block to an equivalent basic block - // to replace it with. All equivalent basic blocks should be replaced with - // the same basic block. To do this, if there is no equivalent BB in the map, - // then insert into the map BB -> BB. Otherwise, we should check only elements - // in the map for equivalence to ensure that all equivalent BB get replaced - // by the BB in the map. Replacing BB with BB has no impact, so we skip - // a call to setSuccessor when we do the actual replacement. - DenseMap ReplaceWith; - for (BasicBlock *BB : BBs) { - bool Inserted = false; - for (auto KV : ReplaceWith) { - if (IsBBEqualTo(BB, KV.first)) { - ReplaceWith[BB] = KV.first; - Inserted = true; - break; - } - } - if (!Inserted) - ReplaceWith[BB] = BB; +bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { + // Build a map where the key is the CaseHandle that contains the successor + // BasicBlock to replace all CaseHandle successor BasicBlocks in the value + // list. We use a custom DenseMapInfo to build this map in O(size(cases()). + // We use CaseHandle instead of BasicBlock since it allows us to do the + // replacement in O(size(cases)). If we had used BasicBlock, then to do the + // actual replacement, we'd need an additional nested loop to loop over all + // CaseHandle since there is no O(1) lookup of BasicBlock -> Cases. + DenseSet> + ReplaceWith; + + // We'd like to use CaseHandle* for our set because it is easier to write + // getEmptyKey and getTombstoneKey for CaseHandle* than it is for CaseHandle. + // We need to take ownership though, since SI->cases() makes no guarantee that + // the CaseHandle is still allocated on every iteration. We can skip over + // cases we know we won't consider for replacement. + SmallVector Cases; + for (auto Case : SI->cases()) { + BasicBlock *BB = Case.getCaseSuccessor(); + // Skip BBs that are not candidates for simplification. + // FIXME: This case needs some extra care because the terminators other than + // SI need to be updated. + if (!BB->hasNPredecessors(1)) + continue; + // FIXME: Relax that the terminator is a BranchInst by checking for equality + // on other kinds of terminators. + Instruction *T = BB->getTerminator(); + if (!T || !isa(T)) + continue; + Cases.emplace_back(Case); } - // Do the replacement in SI. bool MadeChange = false; - // There is no fast lookup of BasicBlock -> Cases, so we iterate over cases - // and check that the case was a candidate. BBs is already filtered, so - // hopefully calling contains on it is not too expensive. - for (auto &Case : SI->cases()) { - BasicBlock *OldSucc = Case.getCaseSuccessor(); - if (!BBs.contains(OldSucc)) - continue; - BasicBlock *NewSucc = ReplaceWith[OldSucc]; - if (OldSucc != NewSucc) { - Case.setSuccessor(NewSucc); + for (auto &Case : Cases) { + // Case is a candidate for simplification. If we find a duplicate BB, + // replace it. + const auto Res = ReplaceWith.find(&Case); + if (Res != ReplaceWith.end()) { + Case.setSuccessor((*Res)->getCaseSuccessor()); MadeChange = true; + } else { + ReplaceWith.insert(&Case); } } From 9e9afc2783de2f5d323c84e1bd8c2284322bce91 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 31 Oct 2024 10:21:45 -0700 Subject: [PATCH 07/34] fixup! use successor BBs in hash --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f84ab936fe343..20184a70dd914 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7449,7 +7449,10 @@ template <> struct DenseMapInfo { } static unsigned getHashValue(const SwitchInst::CaseHandle *Case) { BasicBlock *Succ = Case->getCaseSuccessor(); - return hash_combine(Succ->size(), Succ->getTerminator()->getOpcode()); + BranchInst *BI = cast(Succ->getTerminator()); + + auto It = BI->successors(); + return hash_combine(Succ->size(), hash_combine_range(It.begin(), It.end())); } static bool isEqual(const SwitchInst::CaseHandle *LHS, const SwitchInst::CaseHandle *RHS) { From ef1bc74730c176cce828d129e439847134ec655e Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 31 Oct 2024 13:30:57 -0700 Subject: [PATCH 08/34] fixup! use hasNPredsOrMore for early exit capability --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 20184a70dd914..ddfebb8c9fbd3 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7538,7 +7538,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // Skip BBs that are not candidates for simplification. // FIXME: This case needs some extra care because the terminators other than // SI need to be updated. - if (!BB->hasNPredecessors(1)) + if (BB->hasNPredecessorsOrMore(2)) continue; // FIXME: Relax that the terminator is a BranchInst by checking for equality // on other kinds of terminators. From 794d4e8810190af94193d679bdfb67f6ed63da0d Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 31 Oct 2024 13:36:03 -0700 Subject: [PATCH 09/34] fixup! don't add to Cases if size() != 1 to improve performance --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ddfebb8c9fbd3..37222e1a75855 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7491,10 +7491,10 @@ template <> struct DenseMapInfo { }; auto IsBBEq = [&IsBranchEq](BasicBlock *A, BasicBlock *B) { - // FIXME: Support more than just a single BranchInst. One way we could do - // this is by taking a hashing approach. - if (A->size() != 1 || B->size() != 1) - return false; + // FIXME: we checked that the size of A and B are both 1 in + // simplifyDuplicateSwitchArms to make the Case list smaller to improve + // performance. If we decide to support BasicBlocks with more than just a + // single instruction, we need to check that A.size() == B.size() here. if (!IsBranchEq(cast(A->getTerminator()), cast(B->getTerminator()))) @@ -7536,6 +7536,12 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { for (auto Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); // Skip BBs that are not candidates for simplification. + + // FIXME: Support more than just a single BranchInst. One way we could do + // this is by taking a hashing approach of all insts in BB. + if (BB->size() != 1) + continue; + // FIXME: This case needs some extra care because the terminators other than // SI need to be updated. if (BB->hasNPredecessorsOrMore(2)) @@ -7545,6 +7551,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { Instruction *T = BB->getTerminator(); if (!T || !isa(T)) continue; + Cases.emplace_back(Case); } From 32b627fd388a4e7e63f870803105a70c9931d3b3 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 10:27:41 -0700 Subject: [PATCH 10/34] fixup! precompute getIncomingValueForBlock --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 130 +++++++++++++--------- 1 file changed, 76 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 37222e1a75855..86983249787e2 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7437,28 +7437,39 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, return true; } +/// Checking whether two CaseHandle.getCaseSuccessor() are equal depends on +/// the incoming values of their successor PHINodes. +/// PHINode::getIncomingValueForBlock has O(|Preds|), so we'd like to avoid +/// calling this function on each BasicBlock every time isEqual is called, +/// especially since the same BasicBlock may be passed as an argument multiple +/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock -> +/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking +/// of the incoming values. +struct CaseHandleWrapper { + const SwitchInst::CaseHandle Case; + DenseMap> &PhiPredIVs; +}; + namespace llvm { -template <> struct DenseMapInfo { - static const SwitchInst::CaseHandle *getEmptyKey() { - return reinterpret_cast( +template <> struct DenseMapInfo { + static const CaseHandleWrapper *getEmptyKey() { + return reinterpret_cast( DenseMapInfo::getEmptyKey()); } - static const SwitchInst::CaseHandle *getTombstoneKey() { - return reinterpret_cast( + static const CaseHandleWrapper *getTombstoneKey() { + return reinterpret_cast( DenseMapInfo::getTombstoneKey()); } - static unsigned getHashValue(const SwitchInst::CaseHandle *Case) { - BasicBlock *Succ = Case->getCaseSuccessor(); + static unsigned getHashValue(const CaseHandleWrapper *CT) { + BasicBlock *Succ = CT->Case.getCaseSuccessor(); BranchInst *BI = cast(Succ->getTerminator()); - auto It = BI->successors(); return hash_combine(Succ->size(), hash_combine_range(It.begin(), It.end())); } - static bool isEqual(const SwitchInst::CaseHandle *LHS, - const SwitchInst::CaseHandle *RHS) { - - auto EKey = DenseMapInfo::getEmptyKey(); - auto TKey = DenseMapInfo::getTombstoneKey(); + static bool isEqual(const CaseHandleWrapper *LHS, + const CaseHandleWrapper *RHS) { + auto EKey = DenseMapInfo::getEmptyKey(); + auto TKey = DenseMapInfo::getTombstoneKey(); if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey) return LHS == RHS; @@ -7490,49 +7501,38 @@ template <> struct DenseMapInfo { return true; }; - auto IsBBEq = [&IsBranchEq](BasicBlock *A, BasicBlock *B) { - // FIXME: we checked that the size of A and B are both 1 in - // simplifyDuplicateSwitchArms to make the Case list smaller to improve - // performance. If we decide to support BasicBlocks with more than just a - // single instruction, we need to check that A.size() == B.size() here. - - if (!IsBranchEq(cast(A->getTerminator()), - cast(B->getTerminator()))) - return false; - - // Need to check that PHIs in sucessors have matching values - for (auto *Succ : cast(A->getTerminator())->successors()) - for (PHINode &Phi : Succ->phis()) - if (Phi.getIncomingValueForBlock(A) != - Phi.getIncomingValueForBlock(B)) + auto IsBBEq = + [&IsBranchEq]( + BasicBlock *A, BasicBlock *B, + DenseMap> &PhiPredIVs) { + // FIXME: we checked that the size of A and B are both 1 in + // simplifyDuplicateSwitchArms to make the Case list smaller to + // improve performance. If we decide to support BasicBlocks with more + // than just a single instruction, we need to check that A.size() == + // B.size() here. + + if (!IsBranchEq(cast(A->getTerminator()), + cast(B->getTerminator()))) return false; - return true; - }; + // Need to check that PHIs in sucessors have matching values + for (auto *Succ : cast(A->getTerminator())->successors()) + for (PHINode &Phi : Succ->phis()) + if (PhiPredIVs[&Phi][A] != PhiPredIVs[&Phi][B]) + return false; - return IsBBEq(LHS->getCaseSuccessor(), RHS->getCaseSuccessor()); + return true; + }; + + return IsBBEq(LHS->Case.getCaseSuccessor(), RHS->Case.getCaseSuccessor(), + LHS->PhiPredIVs); } }; } // namespace llvm bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { - // Build a map where the key is the CaseHandle that contains the successor - // BasicBlock to replace all CaseHandle successor BasicBlocks in the value - // list. We use a custom DenseMapInfo to build this map in O(size(cases()). - // We use CaseHandle instead of BasicBlock since it allows us to do the - // replacement in O(size(cases)). If we had used BasicBlock, then to do the - // actual replacement, we'd need an additional nested loop to loop over all - // CaseHandle since there is no O(1) lookup of BasicBlock -> Cases. - DenseSet> - ReplaceWith; - - // We'd like to use CaseHandle* for our set because it is easier to write - // getEmptyKey and getTombstoneKey for CaseHandle* than it is for CaseHandle. - // We need to take ownership though, since SI->cases() makes no guarantee that - // the CaseHandle is still allocated on every iteration. We can skip over - // cases we know we won't consider for replacement. - SmallVector Cases; + DenseMap> PhiPredIVs; + SmallVector Cases; for (auto Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); // Skip BBs that are not candidates for simplification. @@ -7552,19 +7552,41 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!T || !isa(T)) continue; - Cases.emplace_back(Case); + // Preprocess incoming PHI values for successors of BB. + for (BasicBlock *Succ : cast(T)->successors()) { + for (PHINode &Phi : Succ->phis()) { + auto IncomingVals = PhiPredIVs.find(&Phi); + Value *Incoming = Phi.getIncomingValueForBlock(BB); + if (IncomingVals != PhiPredIVs.end()) + IncomingVals->second.insert({BB, Incoming}); + else + PhiPredIVs[&Phi] = {{BB, Incoming}}; + } + } + + Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); } + // Build a set such that if the CaseHandleWrapper exists in the set and + // another CaseHandleWrapper isEqual, then the equivalent CaseHandleWrapper + // which is not in the set should be replaced with the one in the set. If the + // CaseHandleWrapper is not in the set, then it should be added to the set so + // other CaseHandleWrappers can check against it in the same manner. We chose + // to use CaseHandleWrapper instead of BasicBlock since we'd need an extra + // nested loop since there is no O(1) lookup of BasicBlock -> Cases in + // SwichInst. + DenseSet> + ReplaceWith; bool MadeChange = false; - for (auto &Case : Cases) { - // Case is a candidate for simplification. If we find a duplicate BB, + for (auto &CHW : Cases) { + // CHW is a candidate for simplification. If we find a duplicate BB, // replace it. - const auto Res = ReplaceWith.find(&Case); + const auto Res = ReplaceWith.find(&CHW); if (Res != ReplaceWith.end()) { - Case.setSuccessor((*Res)->getCaseSuccessor()); + CHW.Case.setSuccessor((*Res)->Case.getCaseSuccessor()); MadeChange = true; } else { - ReplaceWith.insert(&Case); + ReplaceWith.insert(&CHW); } } From 41903527f1cc8ae0d6df6e6b502b5eb5c2964082 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 11:14:16 -0700 Subject: [PATCH 11/34] fixup! only support unconditional branches --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 40 +++++++++-------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 86983249787e2..f346257ea29aa 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7474,30 +7474,14 @@ template <> struct DenseMapInfo { return LHS == RHS; auto IsBranchEq = [](BranchInst *A, BranchInst *B) { - if (A->isConditional() != B->isConditional()) - return false; - - if (A->isConditional()) { - // If the conditions are instructions, check equality up to - // commutativity. Otherwise, check that the two Values are the same. - Value *AC = A->getCondition(); - Value *BC = B->getCondition(); - auto *ACI = dyn_cast(AC); - auto *BCI = dyn_cast(BC); + assert(A->isUnconditional() && B->isUnconditional() && + "Only supporting unconditional branches for now"); + assert(A->getNumSuccessors() == 1 && B->getNumSuccessors() == 1 && + "Expected unconditional branches to have one successor"); - if (ACI && BCI && !areIdenticalUpToCommutativity(ACI, BCI)) - return false; - if ((!ACI || !BCI) && AC != BC) - return false; - } - - if (A->getNumSuccessors() != B->getNumSuccessors()) + if (A->getSuccessor(0) != B->getSuccessor(0)) return false; - for (unsigned I = 0; I < A->getNumSuccessors(); ++I) - if (A->getSuccessor(I) != B->getSuccessor(I)) - return false; - return true; }; @@ -7546,14 +7530,20 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // SI need to be updated. if (BB->hasNPredecessorsOrMore(2)) continue; - // FIXME: Relax that the terminator is a BranchInst by checking for equality - // on other kinds of terminators. + Instruction *T = BB->getTerminator(); - if (!T || !isa(T)) + if (!T) + continue; + + // FIXME: Relax that the terminator is a BranchInst by checking for equality + // on other kinds of terminators. We decide to only support unconditional + // branches for now for compile time reasons. + auto *BI = dyn_cast(T); + if (!BI || BI->isConditional()) continue; // Preprocess incoming PHI values for successors of BB. - for (BasicBlock *Succ : cast(T)->successors()) { + for (BasicBlock *Succ : BI->successors()) { for (PHINode &Phi : Succ->phis()) { auto IncomingVals = PhiPredIVs.find(&Phi); Value *Incoming = Phi.getIncomingValueForBlock(BB); From f04d67f6e582bd3088c47ccfdebda4a67f16efdc Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 12:09:22 -0700 Subject: [PATCH 12/34] fixup! try improving getHashValue --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f346257ea29aa..9a3ea48eee835 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7463,8 +7463,25 @@ template <> struct DenseMapInfo { static unsigned getHashValue(const CaseHandleWrapper *CT) { BasicBlock *Succ = CT->Case.getCaseSuccessor(); BranchInst *BI = cast(Succ->getTerminator()); - auto It = BI->successors(); - return hash_combine(Succ->size(), hash_combine_range(It.begin(), It.end())); + assert(BI->isUnconditional() && + "Only supporting unconditional branches for now"); + assert(BI->getNumSuccessors() == 1 && + "Expected unconditional branches to have one successor"); + assert(Succ->size() == 1 && "Expected just a single branch in the BB"); + + // Since we assume the BB is just a single BranchInst with a single + // succsessor, we hash as the BB and the incoming Values of its PHIs. + // Initially, we tried to just use the sucessor BB as the hash, but this had + // poor performance. We find that the extra computation of getting the + // incoming PHI values here leads to better performance on overall Set + // performance. + BasicBlock *BB = BI->getSuccessor(0); + SmallVector PhiValsForBB; + for (PHINode &Phi : BB->phis()) + PhiValsForBB.emplace_back(CT->PhiPredIVs[&Phi][BB]); + + return hash_combine( + BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { From 4ca2777a9acf5d79806d1cd138d62203816c8b1a Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 12:18:50 -0700 Subject: [PATCH 13/34] fixup! nitty cleanup --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 30 ++++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 9a3ea48eee835..4f85497917e30 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7510,17 +7510,21 @@ template <> struct DenseMapInfo { // simplifyDuplicateSwitchArms to make the Case list smaller to // improve performance. If we decide to support BasicBlocks with more // than just a single instruction, we need to check that A.size() == - // B.size() here. + // B.size() here, and we need to check more than just the BranchInsts + // for equality. - if (!IsBranchEq(cast(A->getTerminator()), - cast(B->getTerminator()))) + BranchInst *ABI = cast(A->getTerminator()); + if (!IsBranchEq(ABI, cast(B->getTerminator()))) return false; - // Need to check that PHIs in sucessors have matching values - for (auto *Succ : cast(A->getTerminator())->successors()) - for (PHINode &Phi : Succ->phis()) - if (PhiPredIVs[&Phi][A] != PhiPredIVs[&Phi][B]) + // Need to check that PHIs in successors have matching values + for (auto *Succ : ABI->successors()) { + for (PHINode &Phi : Succ->phis()) { + auto PredIVs = PhiPredIVs[&Phi]; + if (PredIVs[A] != PredIVs[B]) return false; + } + } return true; }; @@ -7532,26 +7536,28 @@ template <> struct DenseMapInfo { } // namespace llvm bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { + + // Build PhiPredIVs and Cases. Skip BBs that are not candidates for + // simplification. DenseMap> PhiPredIVs; SmallVector Cases; for (auto Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); - // Skip BBs that are not candidates for simplification. // FIXME: Support more than just a single BranchInst. One way we could do // this is by taking a hashing approach of all insts in BB. if (BB->size() != 1) continue; + Instruction *T = BB->getTerminator(); + if (!T) + continue; + // FIXME: This case needs some extra care because the terminators other than // SI need to be updated. if (BB->hasNPredecessorsOrMore(2)) continue; - Instruction *T = BB->getTerminator(); - if (!T) - continue; - // FIXME: Relax that the terminator is a BranchInst by checking for equality // on other kinds of terminators. We decide to only support unconditional // branches for now for compile time reasons. From 28620dfd7a84775e87cbb586760e640f963c7909 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 12:46:28 -0700 Subject: [PATCH 14/34] fixup! don't reproccess map for same BB --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 4f85497917e30..df984f54d5f0b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7517,13 +7517,14 @@ template <> struct DenseMapInfo { if (!IsBranchEq(ABI, cast(B->getTerminator()))) return false; - // Need to check that PHIs in successors have matching values - for (auto *Succ : ABI->successors()) { - for (PHINode &Phi : Succ->phis()) { - auto PredIVs = PhiPredIVs[&Phi]; - if (PredIVs[A] != PredIVs[B]) - return false; - } + // Need to check that PHIs in successor have matching values + assert(ABI->getNumSuccessors() == 1 && + "Expected unconditional branches to have one successor"); + BasicBlock *Succ = ABI->getSuccessor(0); + for (PHINode &Phi : Succ->phis()) { + auto PredIVs = PhiPredIVs[&Phi]; + if (PredIVs[A] != PredIVs[B]) + return false; } return true; @@ -7541,6 +7542,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // simplification. DenseMap> PhiPredIVs; SmallVector Cases; + SmallPtrSet Seen; for (auto Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); @@ -7565,6 +7567,13 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; + // If we've seen BB before, then we've built PhiPredIVs for it already. + if (Seen.contains(BB)) { + Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); + continue; + } + Seen.insert(BB); + // Preprocess incoming PHI values for successors of BB. for (BasicBlock *Succ : BI->successors()) { for (PHINode &Phi : Succ->phis()) { From 9dcf124d42628e5865f4d1d2f80562a91cab59e5 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 15:38:08 -0700 Subject: [PATCH 15/34] fixup! avoid calls to getIncomingValueForBlock --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 31 ++++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index df984f54d5f0b..8459aad7aa3d1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7538,8 +7538,11 @@ template <> struct DenseMapInfo { bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { - // Build PhiPredIVs and Cases. Skip BBs that are not candidates for - // simplification. + // Build Cases. Skip BBs that are not candidates for simplification. Mark + // PHINodes which need to be processed into PhiPredIVs. We decide to process + // an entire PHI at once opposed to calling getIncomingValueForBlock, since + // each call to getIncomingValueForBlock is O(|Preds|). + SmallPtrSet Phis; DenseMap> PhiPredIVs; SmallVector Cases; SmallPtrSet Seen; @@ -7567,28 +7570,26 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; - // If we've seen BB before, then we've built PhiPredIVs for it already. + // If we've seen BB before, then processed its successor PHIs. if (Seen.contains(BB)) { Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); continue; } Seen.insert(BB); - - // Preprocess incoming PHI values for successors of BB. - for (BasicBlock *Succ : BI->successors()) { - for (PHINode &Phi : Succ->phis()) { - auto IncomingVals = PhiPredIVs.find(&Phi); - Value *Incoming = Phi.getIncomingValueForBlock(BB); - if (IncomingVals != PhiPredIVs.end()) - IncomingVals->second.insert({BB, Incoming}); - else - PhiPredIVs[&Phi] = {{BB, Incoming}}; - } - } + // Keep track of which PHIs we need as keys in PhiPredIVs below. + for (BasicBlock *Succ : BI->successors()) + for (PHINode &Phi : Succ->phis()) + Phis.insert(&Phi); Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); } + for (PHINode *Phi : Phis) { + PhiPredIVs[Phi] = DenseMap(); + for (auto &IV : Phi->incoming_values()) + PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); + } + // Build a set such that if the CaseHandleWrapper exists in the set and // another CaseHandleWrapper isEqual, then the equivalent CaseHandleWrapper // which is not in the set should be replaced with the one in the set. If the From 41edd151fdfdaa3a0410942570be071c7beab2e4 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 1 Nov 2024 15:41:19 -0700 Subject: [PATCH 16/34] fixup! drop Seen vector since we no longer build phi map in loop --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 8459aad7aa3d1..724a0a8a0fc6e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7545,7 +7545,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; DenseMap> PhiPredIVs; SmallVector Cases; - SmallPtrSet Seen; for (auto Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); @@ -7570,12 +7569,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; - // If we've seen BB before, then processed its successor PHIs. - if (Seen.contains(BB)) { - Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); - continue; - } - Seen.insert(BB); // Keep track of which PHIs we need as keys in PhiPredIVs below. for (BasicBlock *Succ : BI->successors()) for (PHINode &Phi : Succ->phis()) From 5c247db53227470f0eb0e47d9522bea8b4540291 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Sat, 2 Nov 2024 08:03:41 -0700 Subject: [PATCH 17/34] fixup! respond to review --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 724a0a8a0fc6e..cd80a13382d4f 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7453,11 +7453,11 @@ struct CaseHandleWrapper { namespace llvm { template <> struct DenseMapInfo { static const CaseHandleWrapper *getEmptyKey() { - return reinterpret_cast( + return static_cast( DenseMapInfo::getEmptyKey()); } static const CaseHandleWrapper *getTombstoneKey() { - return reinterpret_cast( + return static_cast( DenseMapInfo::getTombstoneKey()); } static unsigned getHashValue(const CaseHandleWrapper *CT) { @@ -7522,7 +7522,7 @@ template <> struct DenseMapInfo { "Expected unconditional branches to have one successor"); BasicBlock *Succ = ABI->getSuccessor(0); for (PHINode &Phi : Succ->phis()) { - auto PredIVs = PhiPredIVs[&Phi]; + auto &PredIVs = PhiPredIVs[&Phi]; if (PredIVs[A] != PredIVs[B]) return false; } From c3a1361f31191476669cd0c2fc22d12215cb6c3e Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Sat, 2 Nov 2024 08:49:06 -0700 Subject: [PATCH 18/34] fixup! some cleanup and add Seen checks --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 78 ++++++++++------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index cd80a13382d4f..8d13c1f8f766a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7490,59 +7490,45 @@ template <> struct DenseMapInfo { if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey) return LHS == RHS; - auto IsBranchEq = [](BranchInst *A, BranchInst *B) { - assert(A->isUnconditional() && B->isUnconditional() && - "Only supporting unconditional branches for now"); - assert(A->getNumSuccessors() == 1 && B->getNumSuccessors() == 1 && - "Expected unconditional branches to have one successor"); + BasicBlock *A = LHS->Case.getCaseSuccessor(); + BasicBlock *B = RHS->Case.getCaseSuccessor(); + + // FIXME: we checked that the size of A and B are both 1 in + // simplifyDuplicateSwitchArms to make the Case list smaller to + // improve performance. If we decide to support BasicBlocks with more + // than just a single instruction, we need to check that A.size() == + // B.size() here, and we need to check more than just the BranchInsts + // for equality. + + BranchInst *ABI = cast(A->getTerminator()); + BranchInst *BBI = cast(B->getTerminator()); + assert(ABI->isUnconditional() && BBI->isUnconditional() && + "Only supporting unconditional branches for now"); + assert(ABI->getNumSuccessors() == 1 && + "Expected unconditional branches to have one successor"); + if (ABI->getSuccessor(0) != BBI->getSuccessor(0)) + return false; - if (A->getSuccessor(0) != B->getSuccessor(0)) + // Need to check that PHIs in successor have matching values + BasicBlock *Succ = ABI->getSuccessor(0); + for (PHINode &Phi : Succ->phis()) { + auto &PredIVs = LHS->PhiPredIVs[&Phi]; + if (PredIVs[A] != PredIVs[B]) return false; + } - return true; - }; - - auto IsBBEq = - [&IsBranchEq]( - BasicBlock *A, BasicBlock *B, - DenseMap> &PhiPredIVs) { - // FIXME: we checked that the size of A and B are both 1 in - // simplifyDuplicateSwitchArms to make the Case list smaller to - // improve performance. If we decide to support BasicBlocks with more - // than just a single instruction, we need to check that A.size() == - // B.size() here, and we need to check more than just the BranchInsts - // for equality. - - BranchInst *ABI = cast(A->getTerminator()); - if (!IsBranchEq(ABI, cast(B->getTerminator()))) - return false; - - // Need to check that PHIs in successor have matching values - assert(ABI->getNumSuccessors() == 1 && - "Expected unconditional branches to have one successor"); - BasicBlock *Succ = ABI->getSuccessor(0); - for (PHINode &Phi : Succ->phis()) { - auto &PredIVs = PhiPredIVs[&Phi]; - if (PredIVs[A] != PredIVs[B]) - return false; - } - - return true; - }; - - return IsBBEq(LHS->Case.getCaseSuccessor(), RHS->Case.getCaseSuccessor(), - LHS->PhiPredIVs); + return true; } }; } // namespace llvm bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { - // Build Cases. Skip BBs that are not candidates for simplification. Mark // PHINodes which need to be processed into PhiPredIVs. We decide to process // an entire PHI at once opposed to calling getIncomingValueForBlock, since // each call to getIncomingValueForBlock is O(|Preds|). SmallPtrSet Phis; + SmallPtrSet Seen; DenseMap> PhiPredIVs; SmallVector Cases; for (auto Case : SI->cases()) { @@ -7569,11 +7555,13 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; - // Keep track of which PHIs we need as keys in PhiPredIVs below. - for (BasicBlock *Succ : BI->successors()) - for (PHINode &Phi : Succ->phis()) - Phis.insert(&Phi); - + if (!Seen.contains(BB)) { + Seen.insert(BB); + // Keep track of which PHIs we need as keys in PhiPredIVs below. + for (BasicBlock *Succ : BI->successors()) + for (PHINode &Phi : Succ->phis()) + Phis.insert(&Phi); + } Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); } From b5630807557580b807d6002f8158b6003d73e9d2 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 4 Nov 2024 18:37:33 -0800 Subject: [PATCH 19/34] fixup! try and avoid some resizes --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 8d13c1f8f766a..8cd4617a75172 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7460,8 +7460,8 @@ template <> struct DenseMapInfo { return static_cast( DenseMapInfo::getTombstoneKey()); } - static unsigned getHashValue(const CaseHandleWrapper *CT) { - BasicBlock *Succ = CT->Case.getCaseSuccessor(); + static unsigned getHashValue(const CaseHandleWrapper *CHW) { + BasicBlock *Succ = CHW->Case.getCaseSuccessor(); BranchInst *BI = cast(Succ->getTerminator()); assert(BI->isUnconditional() && "Only supporting unconditional branches for now"); @@ -7478,7 +7478,7 @@ template <> struct DenseMapInfo { BasicBlock *BB = BI->getSuccessor(0); SmallVector PhiValsForBB; for (PHINode &Phi : BB->phis()) - PhiValsForBB.emplace_back(CT->PhiPredIVs[&Phi][BB]); + PhiValsForBB.emplace_back(CHW->PhiPredIVs[&Phi][BB]); return hash_combine( BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); @@ -7530,8 +7530,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; - SmallVector Cases; - for (auto Case : SI->cases()) { + std::vector Cases; + for (auto &Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); // FIXME: Support more than just a single BranchInst. One way we could do @@ -7555,8 +7555,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; - if (!Seen.contains(BB)) { - Seen.insert(BB); + if (Seen.insert(BB).second) { // Keep track of which PHIs we need as keys in PhiPredIVs below. for (BasicBlock *Succ : BI->successors()) for (PHINode &Phi : Succ->phis()) @@ -7566,7 +7565,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { } for (PHINode *Phi : Phis) { - PhiPredIVs[Phi] = DenseMap(); + PhiPredIVs[Phi] = + DenseMap(Phi->getNumIncomingValues()); for (auto &IV : Phi->incoming_values()) PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); } @@ -7580,14 +7580,15 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // nested loop since there is no O(1) lookup of BasicBlock -> Cases in // SwichInst. DenseSet> - ReplaceWith; + ReplaceWith(Cases.size()); + bool MadeChange = false; for (auto &CHW : Cases) { // CHW is a candidate for simplification. If we find a duplicate BB, // replace it. - const auto Res = ReplaceWith.find(&CHW); - if (Res != ReplaceWith.end()) { - CHW.Case.setSuccessor((*Res)->Case.getCaseSuccessor()); + const auto [It, Inserted] = ReplaceWith.insert(&CHW); + if (!Inserted) { + CHW.Case.setSuccessor((*It)->Case.getCaseSuccessor()); MadeChange = true; } else { ReplaceWith.insert(&CHW); From 67b3c8023f277baa120618d495a798761311adab Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 4 Nov 2024 19:41:25 -0800 Subject: [PATCH 20/34] fixup! presize PhiPredIVs based on first pass data --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 8cd4617a75172..84a07d4bf48b1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7564,6 +7564,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); } + PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = DenseMap(Phi->getNumIncomingValues()); From 94ff86ef997cf2cce9c58c02c88d27056b68859e Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 4 Nov 2024 20:23:01 -0800 Subject: [PATCH 21/34] fixup! use reserve instead of resize --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 84a07d4bf48b1..e1398426630e7 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7447,7 +7447,7 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, /// of the incoming values. struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; - DenseMap> &PhiPredIVs; + DenseMap> *PhiPredIVs; }; namespace llvm { @@ -7478,7 +7478,7 @@ template <> struct DenseMapInfo { BasicBlock *BB = BI->getSuccessor(0); SmallVector PhiValsForBB; for (PHINode &Phi : BB->phis()) - PhiValsForBB.emplace_back(CHW->PhiPredIVs[&Phi][BB]); + PhiValsForBB.emplace_back((*CHW->PhiPredIVs)[&Phi][BB]); return hash_combine( BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); @@ -7512,7 +7512,7 @@ template <> struct DenseMapInfo { // Need to check that PHIs in successor have matching values BasicBlock *Succ = ABI->getSuccessor(0); for (PHINode &Phi : Succ->phis()) { - auto &PredIVs = LHS->PhiPredIVs[&Phi]; + auto &PredIVs = (*LHS->PhiPredIVs)[&Phi]; if (PredIVs[A] != PredIVs[B]) return false; } @@ -7531,6 +7531,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Seen; DenseMap> PhiPredIVs; std::vector Cases; + Cases.reserve(SI->getNumCases()); for (auto &Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); @@ -7561,7 +7562,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); } - Cases.emplace_back(CaseHandleWrapper{Case, PhiPredIVs}); + Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs}); } PhiPredIVs.reserve(Phis.size()); @@ -7581,7 +7582,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // nested loop since there is no O(1) lookup of BasicBlock -> Cases in // SwichInst. DenseSet> - ReplaceWith(Cases.size()); + ReplaceWith; + ReplaceWith.reserve(Cases.size()); bool MadeChange = false; for (auto &CHW : Cases) { From c6a5e5257214ddee4111f029e1480c39290b7672 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 4 Nov 2024 21:27:31 -0800 Subject: [PATCH 22/34] fixup! precompute incoming values from BB --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 31 +++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index e1398426630e7..9e73c7c2d8ca7 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7442,12 +7442,16 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, /// PHINode::getIncomingValueForBlock has O(|Preds|), so we'd like to avoid /// calling this function on each BasicBlock every time isEqual is called, /// especially since the same BasicBlock may be passed as an argument multiple -/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock -> -/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking -/// of the incoming values. +/// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred +/// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1) +/// checking of the incoming values. We also store the SmallVector PhiVals which +/// correspond to the incoming Value *s of all PHIs from +/// Case.getCaseSuccessor(), which is used in getHashValue instead of walking +/// all successor phis. struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; DenseMap> *PhiPredIVs; + SmallVector *PhiVals; }; namespace llvm { @@ -7472,16 +7476,10 @@ template <> struct DenseMapInfo { // Since we assume the BB is just a single BranchInst with a single // succsessor, we hash as the BB and the incoming Values of its PHIs. // Initially, we tried to just use the sucessor BB as the hash, but this had - // poor performance. We find that the extra computation of getting the - // incoming PHI values here leads to better performance on overall Set - // performance. + // poor performance. BasicBlock *BB = BI->getSuccessor(0); - SmallVector PhiValsForBB; - for (PHINode &Phi : BB->phis()) - PhiValsForBB.emplace_back((*CHW->PhiPredIVs)[&Phi][BB]); - return hash_combine( - BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); + BB, hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end())); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { @@ -7530,6 +7528,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; + DenseMap> PhiVals; std::vector Cases; Cases.reserve(SI->getNumCases()); for (auto &Case : SI->cases()) { @@ -7562,15 +7561,19 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); } - Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs}); + PhiVals[BB] = SmallVector(); + Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]}); } PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = DenseMap(Phi->getNumIncomingValues()); - for (auto &IV : Phi->incoming_values()) - PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); + for (auto &IV : Phi->incoming_values()) { + BasicBlock *BB = Phi->getIncomingBlock(IV); + PhiPredIVs[Phi].insert({BB, IV.get()}); + PhiVals[BB].emplace_back(IV.get()); + } } // Build a set such that if the CaseHandleWrapper exists in the set and From 241fef5c94ee1bda0bd1da581bd029b7e030a97b Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 5 Nov 2024 07:58:13 -0800 Subject: [PATCH 23/34] fixup! don't hash an empty set of values --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 9e73c7c2d8ca7..066c0303441f5 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7476,10 +7476,13 @@ template <> struct DenseMapInfo { // Since we assume the BB is just a single BranchInst with a single // succsessor, we hash as the BB and the incoming Values of its PHIs. // Initially, we tried to just use the sucessor BB as the hash, but this had - // poor performance. + // poor performance. If the BB has no Phis, then just use BB as the hash. BasicBlock *BB = BI->getSuccessor(0); + if (CHW->PhiVals->begin() == CHW->PhiVals->end()) + return hash_value(BB); return hash_combine( - BB, hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end())); + hash_value(BB), + hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end())); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { From aa4cf4354a425a71aa78dc180f31c98d3dce05df Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 5 Nov 2024 08:05:23 -0800 Subject: [PATCH 24/34] fixup! add some comments --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 29 +++++++++++++---------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 066c0303441f5..ad83514682c93 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7438,16 +7438,16 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, } /// Checking whether two CaseHandle.getCaseSuccessor() are equal depends on -/// the incoming values of their successor PHINodes. -/// PHINode::getIncomingValueForBlock has O(|Preds|), so we'd like to avoid +/// the BasicBlock and the incoming values of their successor PHINodes. +/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid /// calling this function on each BasicBlock every time isEqual is called, /// especially since the same BasicBlock may be passed as an argument multiple /// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred /// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1) -/// checking of the incoming values. We also store the SmallVector PhiVals which -/// correspond to the incoming Value *s of all PHIs from -/// Case.getCaseSuccessor(), which is used in getHashValue instead of walking -/// all successor phis. +/// checking of the incoming values. We also want to use the incoming Phi +/// values of a getCaseSuccessor to calculate the hash value. In order to avoid +/// iterating all of the successor Phis on evry call to getHashValue, we +/// precompute a list of incoming values from getCaseSucessor, PhiVals. struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; DenseMap> *PhiPredIVs; @@ -7474,9 +7474,10 @@ template <> struct DenseMapInfo { assert(Succ->size() == 1 && "Expected just a single branch in the BB"); // Since we assume the BB is just a single BranchInst with a single - // succsessor, we hash as the BB and the incoming Values of its PHIs. - // Initially, we tried to just use the sucessor BB as the hash, but this had - // poor performance. If the BB has no Phis, then just use BB as the hash. + // succsessor, we hash as the BB and the incoming Values of its sucessor + // Phis. Initially, we tried to just use the sucessor BB as the hash, but + // this had poor performance. If the BB has no sucessor Phis, then just use + // BB to compute the hash. BasicBlock *BB = BI->getSuccessor(0); if (CHW->PhiVals->begin() == CHW->PhiVals->end()) return hash_value(BB); @@ -7526,8 +7527,9 @@ template <> struct DenseMapInfo { bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { // Build Cases. Skip BBs that are not candidates for simplification. Mark // PHINodes which need to be processed into PhiPredIVs. We decide to process - // an entire PHI at once opposed to calling getIncomingValueForBlock, since - // each call to getIncomingValueForBlock is O(|Preds|). + // an entire PHI at once after the loop, opposed to calling + // getIncomingValueForBlock inside this loop, since each call to + // getIncomingValueForBlock is O(|Preds|). SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; @@ -7559,7 +7561,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { continue; if (Seen.insert(BB).second) { - // Keep track of which PHIs we need as keys in PhiPredIVs below. + // Keep track of which PHIs we need as keys in PhiPredIVs and whose values + // we need to get for PhiVals below. for (BasicBlock *Succ : BI->successors()) for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); @@ -7568,6 +7571,8 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]}); } + // Precompute the data structures to improve performance of isEqual and + // getHashValue for CaseHandleWrapper. PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = From 8153c4bc98d38b60be3c9f65fa91839235876838 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 5 Nov 2024 10:29:26 -0800 Subject: [PATCH 25/34] fixup! simplify getHashValue --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 40 ++++++----------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ad83514682c93..6854e6a26740f 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7444,14 +7444,10 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, /// especially since the same BasicBlock may be passed as an argument multiple /// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred /// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1) -/// checking of the incoming values. We also want to use the incoming Phi -/// values of a getCaseSuccessor to calculate the hash value. In order to avoid -/// iterating all of the successor Phis on evry call to getHashValue, we -/// precompute a list of incoming values from getCaseSucessor, PhiVals. +/// checking of the incoming values. struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; DenseMap> *PhiPredIVs; - SmallVector *PhiVals; }; namespace llvm { @@ -7472,18 +7468,8 @@ template <> struct DenseMapInfo { assert(BI->getNumSuccessors() == 1 && "Expected unconditional branches to have one successor"); assert(Succ->size() == 1 && "Expected just a single branch in the BB"); - - // Since we assume the BB is just a single BranchInst with a single - // succsessor, we hash as the BB and the incoming Values of its sucessor - // Phis. Initially, we tried to just use the sucessor BB as the hash, but - // this had poor performance. If the BB has no sucessor Phis, then just use - // BB to compute the hash. BasicBlock *BB = BI->getSuccessor(0); - if (CHW->PhiVals->begin() == CHW->PhiVals->end()) - return hash_value(BB); - return hash_combine( - hash_value(BB), - hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end())); + return hash_value(BB); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { @@ -7533,7 +7519,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; - DenseMap> PhiVals; std::vector Cases; Cases.reserve(SI->getNumCases()); for (auto &Case : SI->cases()) { @@ -7560,28 +7545,23 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; - if (Seen.insert(BB).second) { - // Keep track of which PHIs we need as keys in PhiPredIVs and whose values - // we need to get for PhiVals below. + if (Seen.insert(BB).second) + // Keep track of which PHIs we need as keys in PhiPredIVs. for (BasicBlock *Succ : BI->successors()) for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); - } - PhiVals[BB] = SmallVector(); - Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]}); + + Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs}); } - // Precompute the data structures to improve performance of isEqual and - // getHashValue for CaseHandleWrapper. + // Precompute the data structures to improve performance of isEqual for + // CaseHandleWrapper. PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = DenseMap(Phi->getNumIncomingValues()); - for (auto &IV : Phi->incoming_values()) { - BasicBlock *BB = Phi->getIncomingBlock(IV); - PhiPredIVs[Phi].insert({BB, IV.get()}); - PhiVals[BB].emplace_back(IV.get()); - } + for (Use &IV : Phi->incoming_values()) + PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); } // Build a set such that if the CaseHandleWrapper exists in the set and From d1cd111f066cc7d1520e7a6a0ade901470cda3dc Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 5 Nov 2024 11:33:40 -0800 Subject: [PATCH 26/34] fixup! another attempt at fixing hashing --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 33 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6854e6a26740f..77644a4f7d36a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7444,10 +7444,14 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, /// especially since the same BasicBlock may be passed as an argument multiple /// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred /// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1) -/// checking of the incoming values. +/// checking of the incoming values. We also want to use the incoming Phi +/// values of a getCaseSuccessor to calculate the hash value. In order to avoid +/// iterating all of the successor Phis on evry call to getHashValue, we +/// precompute a list of incoming values from getCaseSucessor, PhiVals. struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; DenseMap> *PhiPredIVs; + SmallVector *PhiVals; }; namespace llvm { @@ -7468,8 +7472,15 @@ template <> struct DenseMapInfo { assert(BI->getNumSuccessors() == 1 && "Expected unconditional branches to have one successor"); assert(Succ->size() == 1 && "Expected just a single branch in the BB"); + + // Since the BB is just a single BranchInst with a single successor, we hash + // the BB and the incoming Values of its successor incoming PHI values. + // Initially, we tried to just use the successor BB as the hash, but this + // has better performance. BasicBlock *BB = BI->getSuccessor(0); - return hash_value(BB); + return hash_combine( + hash_value(BB), + hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end())); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { @@ -7519,6 +7530,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; + DenseMap> PhiVals; std::vector Cases; Cases.reserve(SI->getNumCases()); for (auto &Case : SI->cases()) { @@ -7551,17 +7563,24 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); - Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs}); + PhiVals[BB] = SmallVector(); + Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]}); } - // Precompute the data structures to improve performance of isEqual for - // CaseHandleWrapper. + // Precompute the data structures to improve performance of isEqual and + // getHashValue for CaseHandleWrapper. PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = DenseMap(Phi->getNumIncomingValues()); - for (Use &IV : Phi->incoming_values()) - PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); + for (Use &IV : Phi->incoming_values()) { + BasicBlock *BB = Phi->getIncomingBlock(IV); + PhiPredIVs[Phi].insert({BB, IV.get()}); + // Only need to track PhiVals for BBs we've added as keys in the loop + // above. + if (PhiVals.contains(BB)) + PhiVals[BB].emplace_back(IV.get()); + } } // Build a set such that if the CaseHandleWrapper exists in the set and From 4949890d8ff50e2eb08d329917af0d9a964e1cc8 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 5 Nov 2024 12:45:00 -0800 Subject: [PATCH 27/34] fixup! fix crashes --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 27 +++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 77644a4f7d36a..48cab049cb9e3 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7451,7 +7451,7 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; DenseMap> *PhiPredIVs; - SmallVector *PhiVals; + DenseMap> *PhiVals; }; namespace llvm { @@ -7478,9 +7478,9 @@ template <> struct DenseMapInfo { // Initially, we tried to just use the successor BB as the hash, but this // has better performance. BasicBlock *BB = BI->getSuccessor(0); - return hash_combine( - hash_value(BB), - hash_combine_range(CHW->PhiVals->begin(), CHW->PhiVals->end())); + auto &Vals = (*CHW->PhiVals)[BB]; + return hash_combine(hash_value(BB), + hash_combine_range(Vals.begin(), Vals.end())); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { @@ -7557,14 +7557,16 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { if (!BI || BI->isConditional()) continue; - if (Seen.insert(BB).second) + if (Seen.insert(BB).second) { // Keep track of which PHIs we need as keys in PhiPredIVs. - for (BasicBlock *Succ : BI->successors()) - for (PHINode &Phi : Succ->phis()) + for (BasicBlock *Succ : BI->successors()) { + for (PHINode &Phi : Succ->phis()) { Phis.insert(&Phi); - - PhiVals[BB] = SmallVector(); - Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals[BB]}); + PhiVals[BB] = SmallVector(); + } + } + } + Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals}); } // Precompute the data structures to improve performance of isEqual and @@ -7578,8 +7580,9 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { PhiPredIVs[Phi].insert({BB, IV.get()}); // Only need to track PhiVals for BBs we've added as keys in the loop // above. - if (PhiVals.contains(BB)) - PhiVals[BB].emplace_back(IV.get()); + auto Res = PhiVals.find(BB); + if (Res != PhiVals.end()) + Res->second.emplace_back(IV.get()); } } From e91253c26542d19c9396a32b9cf7a89fbddc83ef Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 5 Nov 2024 14:00:56 -0800 Subject: [PATCH 28/34] fixup~ Revert changes that precompute for getHashValue. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 59 ++++++++++------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 48cab049cb9e3..b10cbaea7c180 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7442,16 +7442,12 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, /// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid /// calling this function on each BasicBlock every time isEqual is called, /// especially since the same BasicBlock may be passed as an argument multiple -/// times. To do this, we can precompute a map, PhiPredIVs, of PHINode -> Pred -/// BasicBlock -> IncomingValue and add it in the Wrapper so isEqual can do O(1) -/// checking of the incoming values. We also want to use the incoming Phi -/// values of a getCaseSuccessor to calculate the hash value. In order to avoid -/// iterating all of the successor Phis on evry call to getHashValue, we -/// precompute a list of incoming values from getCaseSucessor, PhiVals. +/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock -> +/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking +/// of the incoming values. struct CaseHandleWrapper { const SwitchInst::CaseHandle Case; DenseMap> *PhiPredIVs; - DenseMap> *PhiVals; }; namespace llvm { @@ -7473,14 +7469,22 @@ template <> struct DenseMapInfo { "Expected unconditional branches to have one successor"); assert(Succ->size() == 1 && "Expected just a single branch in the BB"); - // Since the BB is just a single BranchInst with a single successor, we hash - // the BB and the incoming Values of its successor incoming PHI values. - // Initially, we tried to just use the successor BB as the hash, but this - // has better performance. + // Since we assume the BB is just a single BranchInst with a single + // succsessor, we hash as the BB and the incoming Values of its successor + // PHIs. Initially, we tried to just use the successor BB as the hash, but + // this had poor performance. We find that the extra computation of getting + // the incoming PHI values here leads to better performance on overall Set + // performance. We also tried to build a map from BB -> Succs.IncomingValues + // ahead of time and passing it in CaseHandleWrapper, but this slowed down + // the average compile time without having any impact on the worst case + // compile time. BasicBlock *BB = BI->getSuccessor(0); - auto &Vals = (*CHW->PhiVals)[BB]; - return hash_combine(hash_value(BB), - hash_combine_range(Vals.begin(), Vals.end())); + SmallVector PhiValsForBB; + for (PHINode &Phi : BB->phis()) + PhiValsForBB.emplace_back((*CHW->PhiPredIVs)[&Phi][BB]); + + return hash_combine( + BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); } static bool isEqual(const CaseHandleWrapper *LHS, const CaseHandleWrapper *RHS) { @@ -7530,7 +7534,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; - DenseMap> PhiVals; std::vector Cases; Cases.reserve(SI->getNumCases()); for (auto &Case : SI->cases()) { @@ -7558,32 +7561,22 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { continue; if (Seen.insert(BB).second) { - // Keep track of which PHIs we need as keys in PhiPredIVs. - for (BasicBlock *Succ : BI->successors()) { - for (PHINode &Phi : Succ->phis()) { + // Keep track of which PHIs we need as keys in PhiPredIVs below. + for (BasicBlock *Succ : BI->successors()) + for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); - PhiVals[BB] = SmallVector(); - } - } } - Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs, &PhiVals}); + Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs}); } - // Precompute the data structures to improve performance of isEqual and - // getHashValue for CaseHandleWrapper. + // Precompute a data structure to improve performance of isEqual for + // CaseHandleWrapper. PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = DenseMap(Phi->getNumIncomingValues()); - for (Use &IV : Phi->incoming_values()) { - BasicBlock *BB = Phi->getIncomingBlock(IV); - PhiPredIVs[Phi].insert({BB, IV.get()}); - // Only need to track PhiVals for BBs we've added as keys in the loop - // above. - auto Res = PhiVals.find(BB); - if (Res != PhiVals.end()) - Res->second.emplace_back(IV.get()); - } + for (auto &IV : Phi->incoming_values()) + PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); } // Build a set such that if the CaseHandleWrapper exists in the set and From d87ea1d89edc0ac3e6fa1dc05b76b396e92022d6 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 7 Nov 2024 13:00:04 -0800 Subject: [PATCH 29/34] fixup! use SmallVector instead of vector --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b10cbaea7c180..050aa0985ae93 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7534,7 +7534,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; - std::vector Cases; + SmallVector Cases; Cases.reserve(SI->getNumCases()); for (auto &Case : SI->cases()) { BasicBlock *BB = Case.getCaseSuccessor(); From be55920214d909fdc0981a247316138f64b734de Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 11 Nov 2024 14:16:18 -0800 Subject: [PATCH 30/34] fixup! update dom tree --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 050aa0985ae93..5dd57dea19c6c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -276,7 +276,7 @@ class SimplifyCFGOpt { bool simplifyCleanupReturn(CleanupReturnInst *RI); bool simplifyUnreachable(UnreachableInst *UI); bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); - bool simplifyDuplicateSwitchArms(SwitchInst *SI); + bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU); bool simplifyIndirectBr(IndirectBrInst *IBI); bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); @@ -7525,7 +7525,8 @@ template <> struct DenseMapInfo { }; } // namespace llvm -bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { +bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, + DomTreeUpdater *DTU) { // Build Cases. Skip BBs that are not candidates for simplification. Mark // PHINodes which need to be processed into PhiPredIVs. We decide to process // an entire PHI at once after the loop, opposed to calling @@ -7592,17 +7593,25 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI) { ReplaceWith.reserve(Cases.size()); bool MadeChange = false; + SmallVector Updates; + Updates.reserve(ReplaceWith.size()); for (auto &CHW : Cases) { // CHW is a candidate for simplification. If we find a duplicate BB, // replace it. const auto [It, Inserted] = ReplaceWith.insert(&CHW); if (!Inserted) { + // We know that SI's parent BB no longer dominates the old case successor + // since we are making it dead. + Updates.push_back({DominatorTree::Delete, SI->getParent(), + CHW.Case.getCaseSuccessor()}); CHW.Case.setSuccessor((*It)->Case.getCaseSuccessor()); MadeChange = true; } else { ReplaceWith.insert(&CHW); } } + if (DTU) + DTU->applyUpdates(Updates); return MadeChange; } @@ -7667,7 +7676,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts)) return requestResimplify(); - if (simplifyDuplicateSwitchArms(SI)) + if (simplifyDuplicateSwitchArms(SI, DTU)) return requestResimplify(); return false; From 085b04882e373acc37a5dc8f592dbb29f5346571 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 11 Nov 2024 15:02:10 -0800 Subject: [PATCH 31/34] fixup! use get/set Sucessor --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 84 ++++++++++--------- .../Transforms/PhaseOrdering/switch-sext.ll | 2 +- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 5dd57dea19c6c..f90b7b55c4f56 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7437,31 +7437,34 @@ static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, return true; } -/// Checking whether two CaseHandle.getCaseSuccessor() are equal depends on -/// the BasicBlock and the incoming values of their successor PHINodes. +/// Checking whether two cases of SI are equal depends on the contents of the +/// BasicBlock and the incoming values of their successor PHINodes. /// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid /// calling this function on each BasicBlock every time isEqual is called, /// especially since the same BasicBlock may be passed as an argument multiple /// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock -> /// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking /// of the incoming values. -struct CaseHandleWrapper { - const SwitchInst::CaseHandle Case; +struct SwitchSuccWrapper { + // Keep so we can use SwitchInst::setSuccessor to do the replacement. It won't + // be important to equality though. + unsigned SuccNum; + BasicBlock *Dest; DenseMap> *PhiPredIVs; }; namespace llvm { -template <> struct DenseMapInfo { - static const CaseHandleWrapper *getEmptyKey() { - return static_cast( +template <> struct DenseMapInfo { + static const SwitchSuccWrapper *getEmptyKey() { + return static_cast( DenseMapInfo::getEmptyKey()); } - static const CaseHandleWrapper *getTombstoneKey() { - return static_cast( + static const SwitchSuccWrapper *getTombstoneKey() { + return static_cast( DenseMapInfo::getTombstoneKey()); } - static unsigned getHashValue(const CaseHandleWrapper *CHW) { - BasicBlock *Succ = CHW->Case.getCaseSuccessor(); + static unsigned getHashValue(const SwitchSuccWrapper *SSW) { + BasicBlock *Succ = SSW->Dest; BranchInst *BI = cast(Succ->getTerminator()); assert(BI->isUnconditional() && "Only supporting unconditional branches for now"); @@ -7475,26 +7478,26 @@ template <> struct DenseMapInfo { // this had poor performance. We find that the extra computation of getting // the incoming PHI values here leads to better performance on overall Set // performance. We also tried to build a map from BB -> Succs.IncomingValues - // ahead of time and passing it in CaseHandleWrapper, but this slowed down + // ahead of time and passing it in SwitchSuccWrapper, but this slowed down // the average compile time without having any impact on the worst case // compile time. BasicBlock *BB = BI->getSuccessor(0); SmallVector PhiValsForBB; for (PHINode &Phi : BB->phis()) - PhiValsForBB.emplace_back((*CHW->PhiPredIVs)[&Phi][BB]); + PhiValsForBB.emplace_back((*SSW->PhiPredIVs)[&Phi][BB]); return hash_combine( BB, hash_combine_range(PhiValsForBB.begin(), PhiValsForBB.end())); } - static bool isEqual(const CaseHandleWrapper *LHS, - const CaseHandleWrapper *RHS) { - auto EKey = DenseMapInfo::getEmptyKey(); - auto TKey = DenseMapInfo::getTombstoneKey(); + static bool isEqual(const SwitchSuccWrapper *LHS, + const SwitchSuccWrapper *RHS) { + auto EKey = DenseMapInfo::getEmptyKey(); + auto TKey = DenseMapInfo::getTombstoneKey(); if (LHS == EKey || RHS == EKey || LHS == TKey || RHS == TKey) return LHS == RHS; - BasicBlock *A = LHS->Case.getCaseSuccessor(); - BasicBlock *B = RHS->Case.getCaseSuccessor(); + BasicBlock *A = LHS->Dest; + BasicBlock *B = RHS->Dest; // FIXME: we checked that the size of A and B are both 1 in // simplifyDuplicateSwitchArms to make the Case list smaller to @@ -7535,10 +7538,11 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, SmallPtrSet Phis; SmallPtrSet Seen; DenseMap> PhiPredIVs; - SmallVector Cases; + SmallVector Cases; Cases.reserve(SI->getNumCases()); - for (auto &Case : SI->cases()) { - BasicBlock *BB = Case.getCaseSuccessor(); + + for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) { + BasicBlock *BB = SI->getSuccessor(I); // FIXME: Support more than just a single BranchInst. One way we could do // this is by taking a hashing approach of all insts in BB. @@ -7567,11 +7571,11 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, for (PHINode &Phi : Succ->phis()) Phis.insert(&Phi); } - Cases.emplace_back(CaseHandleWrapper{Case, &PhiPredIVs}); + Cases.emplace_back(SwitchSuccWrapper{I, BB, &PhiPredIVs}); } // Precompute a data structure to improve performance of isEqual for - // CaseHandleWrapper. + // SwitchSuccWrapper. PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = @@ -7580,36 +7584,36 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); } - // Build a set such that if the CaseHandleWrapper exists in the set and - // another CaseHandleWrapper isEqual, then the equivalent CaseHandleWrapper + // Build a set such that if the SwitchSuccWrapper exists in the set and + // another SwitchSuccWrapper isEqual, then the equivalent SwitchSuccWrapper // which is not in the set should be replaced with the one in the set. If the - // CaseHandleWrapper is not in the set, then it should be added to the set so - // other CaseHandleWrappers can check against it in the same manner. We chose - // to use CaseHandleWrapper instead of BasicBlock since we'd need an extra - // nested loop since there is no O(1) lookup of BasicBlock -> Cases in - // SwichInst. - DenseSet> + // SwitchSuccWrapper is not in the set, then it should be added to the set so + // other SwitchSuccWrappers can check against it in the same manner. We use + // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass + // around information to isEquality, getHashValue, and when doing the + // replacement with better performance. + DenseSet> ReplaceWith; ReplaceWith.reserve(Cases.size()); - bool MadeChange = false; SmallVector Updates; Updates.reserve(ReplaceWith.size()); - for (auto &CHW : Cases) { - // CHW is a candidate for simplification. If we find a duplicate BB, + bool MadeChange = false; + for (auto &SSW : Cases) { + // SSW is a candidate for simplification. If we find a duplicate BB, // replace it. - const auto [It, Inserted] = ReplaceWith.insert(&CHW); + const auto [It, Inserted] = ReplaceWith.insert(&SSW); if (!Inserted) { // We know that SI's parent BB no longer dominates the old case successor // since we are making it dead. - Updates.push_back({DominatorTree::Delete, SI->getParent(), - CHW.Case.getCaseSuccessor()}); - CHW.Case.setSuccessor((*It)->Case.getCaseSuccessor()); + Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest}); + SI->setSuccessor(SSW.SuccNum, (*It)->Dest); MadeChange = true; } else { - ReplaceWith.insert(&CHW); + ReplaceWith.insert(&SSW); } } + if (DTU) DTU->applyUpdates(Updates); diff --git a/llvm/test/Transforms/PhaseOrdering/switch-sext.ll b/llvm/test/Transforms/PhaseOrdering/switch-sext.ll index e39771b629e01..0e352ba52f6a2 100644 --- a/llvm/test/Transforms/PhaseOrdering/switch-sext.ll +++ b/llvm/test/Transforms/PhaseOrdering/switch-sext.ll @@ -6,8 +6,8 @@ define i8 @test_switch_with_sext_phi(i8 %code) { ; CHECK-SAME: i8 [[CODE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: switch i8 [[CODE]], label [[SW_EPILOG:%.*]] [ -; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]] ; CHECK-NEXT: i8 108, label [[SW_BB2:%.*]] +; CHECK-NEXT: i8 76, label [[SW_BB3:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb2: ; CHECK-NEXT: br label [[SW_EPILOG]] From 3f0b02035d1e1c5edd7addb2ae812275620846c7 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 12 Nov 2024 07:14:14 -0800 Subject: [PATCH 32/34] fixup! avoid extra insert --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f90b7b55c4f56..bb38591e1a00b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7609,8 +7609,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, Updates.push_back({DominatorTree::Delete, SI->getParent(), SSW.Dest}); SI->setSuccessor(SSW.SuccNum, (*It)->Dest); MadeChange = true; - } else { - ReplaceWith.insert(&SSW); } } From 3430a66e512d7f6e1c4dec43b821f26fecc1637b Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 14 Nov 2024 08:00:25 -0800 Subject: [PATCH 33/34] fixup! respond to review --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 +-- .../Transforms/SimplifyCFG/switch-dup-bbs.ll | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index bb38591e1a00b..818e464f10665 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7510,8 +7510,6 @@ template <> struct DenseMapInfo { BranchInst *BBI = cast(B->getTerminator()); assert(ABI->isUnconditional() && BBI->isUnconditional() && "Only supporting unconditional branches for now"); - assert(ABI->getNumSuccessors() == 1 && - "Expected unconditional branches to have one successor"); if (ABI->getSuccessor(0) != BBI->getSuccessor(0)) return false; @@ -7539,7 +7537,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, SmallPtrSet Seen; DenseMap> PhiPredIVs; SmallVector Cases; - Cases.reserve(SI->getNumCases()); + Cases.reserve(SI->getNumSuccessors()); for (unsigned I = 0; I < SI->getNumSuccessors(); ++I) { BasicBlock *BB = SI->getSuccessor(I); diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll index b12db656fdf68..e5ee2e1663d55 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll @@ -138,3 +138,35 @@ define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 % %10 = phi i32 [ %4, %6 ], [ %3, %8 ], [ %3, %7 ] ret i32 %10 } + +define i32 @switch_dup_default(i32 %0, i32 %1, i32 %2, i32 %3) { +; SIMPLIFY-CFG-LABEL: define i32 @switch_dup_default( +; SIMPLIFY-CFG-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) { +; SIMPLIFY-CFG-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = select i1 [[COND]], i32 [[TMP3]], i32 [[TMP2]] +; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] +; +; O3-LABEL: define i32 @switch_dup_default( +; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; O3-NEXT: [[TMP8:%.*]] = select i1 [[COND]], i32 [[TMP3]], i32 [[TMP2]] +; O3-NEXT: ret i32 [[TMP8]] +; + switch i32 %1, label %7 [ + i32 0, label %5 + i32 1, label %6 + ] + +5: + br label %8 + +6: + br label %8 + +7: + br label %8 + +8: + %9 = phi i32 [ %3, %5 ], [ %2, %6 ], [ %2, %7 ] + ret i32 %9 +} From 852f3f09652ce8b48bfa140060eb76cc7e1d24c8 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 14 Nov 2024 09:55:32 -0800 Subject: [PATCH 34/34] fixup! respond to review --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 28 +++++------ .../Transforms/SimplifyCFG/switch-dup-bbs.ll | 47 +------------------ 2 files changed, 13 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 818e464f10665..fc1de6746461e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7450,7 +7450,7 @@ struct SwitchSuccWrapper { // be important to equality though. unsigned SuccNum; BasicBlock *Dest; - DenseMap> *PhiPredIVs; + DenseMap> *PhiPredIVs; }; namespace llvm { @@ -7473,14 +7473,13 @@ template <> struct DenseMapInfo { assert(Succ->size() == 1 && "Expected just a single branch in the BB"); // Since we assume the BB is just a single BranchInst with a single - // succsessor, we hash as the BB and the incoming Values of its successor + // successor, we hash as the BB and the incoming Values of its successor // PHIs. Initially, we tried to just use the successor BB as the hash, but - // this had poor performance. We find that the extra computation of getting - // the incoming PHI values here leads to better performance on overall Set - // performance. We also tried to build a map from BB -> Succs.IncomingValues - // ahead of time and passing it in SwitchSuccWrapper, but this slowed down - // the average compile time without having any impact on the worst case - // compile time. + // including the incoming PHI values leads to better performance. + // We also tried to build a map from BB -> Succs.IncomingValues ahead of + // time and passing it in SwitchSuccWrapper, but this slowed down the + // average compile time without having any impact on the worst case compile + // time. BasicBlock *BB = BI->getSuccessor(0); SmallVector PhiValsForBB; for (PHINode &Phi : BB->phis()) @@ -7535,7 +7534,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, // getIncomingValueForBlock is O(|Preds|). SmallPtrSet Phis; SmallPtrSet Seen; - DenseMap> PhiPredIVs; + DenseMap> PhiPredIVs; SmallVector Cases; Cases.reserve(SI->getNumSuccessors()); @@ -7547,10 +7546,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, if (BB->size() != 1) continue; - Instruction *T = BB->getTerminator(); - if (!T) - continue; - // FIXME: This case needs some extra care because the terminators other than // SI need to be updated. if (BB->hasNPredecessorsOrMore(2)) @@ -7559,7 +7554,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, // FIXME: Relax that the terminator is a BranchInst by checking for equality // on other kinds of terminators. We decide to only support unconditional // branches for now for compile time reasons. - auto *BI = dyn_cast(T); + auto *BI = dyn_cast(BB->getTerminator()); if (!BI || BI->isConditional()) continue; @@ -7577,7 +7572,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, PhiPredIVs.reserve(Phis.size()); for (PHINode *Phi : Phis) { PhiPredIVs[Phi] = - DenseMap(Phi->getNumIncomingValues()); + SmallDenseMap(Phi->getNumIncomingValues()); for (auto &IV : Phi->incoming_values()) PhiPredIVs[Phi].insert({Phi->getIncomingBlock(IV), IV.get()}); } @@ -7590,8 +7585,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI, // SwitchSuccWrapper instead of just BasicBlock because we'd like to pass // around information to isEquality, getHashValue, and when doing the // replacement with better performance. - DenseSet> - ReplaceWith; + DenseSet ReplaceWith; ReplaceWith.reserve(Cases.size()); SmallVector Updates; diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll index e5ee2e1663d55..0df4deba1a156 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-dup-bbs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s -check-prefix=SIMPLIFY-CFG -; RUN: opt < %s -O3 -S | FileCheck %s -check-prefix=O3 +; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S \ +; RUN: | FileCheck %s -check-prefix=SIMPLIFY-CFG define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { ; SIMPLIFY-CFG-LABEL: define i32 @switch_all_duplicate_arms( @@ -14,12 +14,6 @@ define i32 @switch_all_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3) { ; SIMPLIFY-CFG: [[BB6]]: ; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP4:%.*]] ], [ [[TMP2]], %[[BB5]] ] ; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] -; -; O3-LABEL: define i32 @switch_all_duplicate_arms( -; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; O3-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[TMP1]], 2 -; O3-NEXT: [[TMP8:%.*]] = select i1 [[SWITCH]], i32 [[TMP2]], i32 [[TMP3]] -; O3-NEXT: ret i32 [[TMP8]] ; switch i32 %1, label %7 [ i32 0, label %5 @@ -52,21 +46,6 @@ define i32 @switch_some_duplicate_arms(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { ; SIMPLIFY-CFG: [[BB8]]: ; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] ; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] -; -; O3-LABEL: define i32 @switch_some_duplicate_arms( -; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] { -; O3-NEXT: switch i32 [[TMP1]], label %[[BB8:.*]] [ -; O3-NEXT: i32 0, label %[[BB6:.*]] -; O3-NEXT: i32 1, label %[[BB6]] -; O3-NEXT: i32 2, label %[[BB7:.*]] -; O3-NEXT: ] -; O3: [[BB6]]: -; O3-NEXT: br label %[[BB8]] -; O3: [[BB7]]: -; O3-NEXT: br label %[[BB8]] -; O3: [[BB8]]: -; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP3]], [[TMP5:%.*]] ], [ [[TMP4]], %[[BB7]] ], [ [[TMP2]], %[[BB6]] ] -; O3-NEXT: ret i32 [[TMP10]] ; switch i32 %1, label %9 [ i32 0, label %6 @@ -104,22 +83,6 @@ define i32 @switch_duplicate_arms_multipred(i1 %0, i32 %1, i32 %2, i32 %3, i32 % ; SIMPLIFY-CFG: [[BB9]]: ; SIMPLIFY-CFG-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ] ; SIMPLIFY-CFG-NEXT: ret i32 [[TMP10]] -; -; O3-LABEL: define i32 @switch_duplicate_arms_multipred( -; O3-SAME: i1 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]]) local_unnamed_addr #[[ATTR0]] { -; O3-NEXT: br i1 [[TMP0]], label %[[BB6:.*]], label %[[BB7:.*]] -; O3: [[BB6]]: -; O3-NEXT: switch i32 [[TMP2]], label %[[BB9:.*]] [ -; O3-NEXT: i32 0, label %[[BB7]] -; O3-NEXT: i32 1, label %[[BB8:.*]] -; O3-NEXT: ] -; O3: [[BB7]]: -; O3-NEXT: br label %[[BB9]] -; O3: [[BB8]]: -; O3-NEXT: br label %[[BB9]] -; O3: [[BB9]]: -; O3-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP4]], %[[BB6]] ], [ [[TMP3]], %[[BB8]] ], [ [[TMP3]], %[[BB7]] ] -; O3-NEXT: ret i32 [[TMP10]] ; br i1 %0, label %6, label %7 6: @@ -145,12 +108,6 @@ define i32 @switch_dup_default(i32 %0, i32 %1, i32 %2, i32 %3) { ; SIMPLIFY-CFG-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP1]], 0 ; SIMPLIFY-CFG-NEXT: [[TMP8:%.*]] = select i1 [[COND]], i32 [[TMP3]], i32 [[TMP2]] ; SIMPLIFY-CFG-NEXT: ret i32 [[TMP8]] -; -; O3-LABEL: define i32 @switch_dup_default( -; O3-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0]] { -; O3-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP1]], 0 -; O3-NEXT: [[TMP8:%.*]] = select i1 [[COND]], i32 [[TMP3]], i32 [[TMP2]] -; O3-NEXT: ret i32 [[TMP8]] ; switch i32 %1, label %7 [ i32 0, label %5