diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 4fac5d36ddb3f..b71c5b7f5b2f6 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -301,7 +301,9 @@ class SimplifyCFGOpt { bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - + bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI, + SelectInst *Select, + IRBuilder<> &Builder); bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly); bool hoistSuccIdenticalTerminatorToSwitchOrIf( Instruction *TI, Instruction *I1, @@ -5011,16 +5013,65 @@ bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI, /// the PHI, merging the third icmp into the switch. bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder) { + // Select == nullptr means we assume that there is a hidden no-op select + // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...` + return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder); +} + +/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic +/// case. This is called when we find an icmp instruction (a seteq/setne with a +/// constant) and its following select instruction as the only TWO instructions +/// in a block that ends with an uncond branch. We are looking for a very +/// specific pattern that occurs when " +/// if (A == 1) return C1; +/// if (A == 2) return C2; +/// if (A < 3) return C3; +/// return C4; +/// " gets simplified. In this case, we merge the first two "branches of icmp" +/// into a switch, but then the default value goes to an uncond block with a lt +/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2". +/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might +/// get something like: +/// +/// case1: +/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ] +/// case2: +/// br label %end +/// DEFAULT: +/// %tmp = icmp eq i8 %A, 2 +/// %val = select i1 %tmp, i8 C3, i8 C4 +/// br label %end +/// end: +/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ] +/// +/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4 +/// to the PHI, merging the icmp & select into the switch, as follows: +/// +/// case1: +/// switch i8 %A, label %DEFAULT [ +/// i8 0, label %end +/// i8 1, label %case2 +/// i8 2, label %case3 +/// ] +/// case2: +/// br label %end +/// case3: +/// br label %end +/// DEFAULT: +/// br label %end +/// end: +/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT] +bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt( + ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) { BasicBlock *BB = ICI->getParent(); - // If the block has any PHIs in it or the icmp has multiple uses, it is too - // complex. - if (isa(BB->begin()) || !ICI->hasOneUse()) + // If the block has any PHIs in it or the icmp/select has multiple uses, it is + // too complex. + /// TODO: support multi-phis in succ BB of select's BB. + if (isa(BB->begin()) || !ICI->hasOneUse() || + (Select && !Select->hasOneUse())) return false; - Value *V = ICI->getOperand(0); - ConstantInt *Cst = cast(ICI->getOperand(1)); - // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. @@ -5028,8 +5079,36 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( if (!Pred || !isa(Pred->getTerminator())) return false; + Value *IcmpCond; + ConstantInt *NewCaseVal; + CmpPredicate Predicate; + + // Match icmp X, C + if (!match(ICI, + m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal)))) + return false; + + Value *SelectCond, *SelectTrueVal, *SelectFalseVal; + Instruction *User; + if (!Select) { + // If Select == nullptr, we can assume that there is a hidden no-op select + // just after icmp + SelectCond = ICI; + SelectTrueVal = Builder.getTrue(); + SelectFalseVal = Builder.getFalse(); + User = ICI->user_back(); + } else { + SelectCond = Select->getCondition(); + // Check if the select condition is the same as the icmp condition. + if (SelectCond != ICI) + return false; + SelectTrueVal = Select->getTrueValue(); + SelectFalseVal = Select->getFalseValue(); + User = Select->user_back(); + } + SwitchInst *SI = cast(Pred->getTerminator()); - if (SI->getCondition() != V) + if (SI->getCondition() != IcmpCond) return false; // If BB is reachable on a non-default case, then we simply know the value of @@ -5051,9 +5130,9 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. - if (SI->findCaseValue(Cst) != SI->case_default()) { + if (SI->findCaseValue(NewCaseVal) != SI->case_default()) { Value *V; - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + if (Predicate == ICmpInst::ICMP_EQ) V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); @@ -5064,25 +5143,30 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( return requestResimplify(); } - // The use of the icmp has to be in the 'end' block, by the only PHI node in + // The use of the select has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); - PHINode *PHIUse = dyn_cast(ICI->user_back()); + PHINode *PHIUse = dyn_cast(User); if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || isa(++BasicBlock::iterator(PHIUse))) return false; - // If the icmp is a SETEQ, then the default dest gets false, the new edge gets - // true in the PHI. - Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); - Constant *NewCst = ConstantInt::getFalse(BB->getContext()); + // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new + // edge gets SelectTrueVal in the PHI. + Value *DefaultCst = SelectFalseVal; + Value *NewCst = SelectTrueVal; - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + if (ICI->getPredicate() == ICmpInst::ICMP_NE) std::swap(DefaultCst, NewCst); - // Replace ICI (which is used by the PHI for the default value) with true or - // false depending on if it is EQ or NE. - ICI->replaceAllUsesWith(DefaultCst); + // Replace Select (which is used by the PHI for the default value) with + // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE. + if (Select) { + Select->replaceAllUsesWith(DefaultCst); + Select->eraseFromParent(); + } else { + ICI->replaceAllUsesWith(DefaultCst); + } ICI->eraseFromParent(); SmallVector Updates; @@ -5099,7 +5183,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( NewW = ((uint64_t(*W0) + 1) >> 1); SIW.setSuccessorWeight(0, *NewW); } - SIW.addCase(Cst, NewBB, NewW); + SIW.addCase(NewCaseVal, NewBB, NewW); if (DTU) Updates.push_back({DominatorTree::Insert, Pred, NewBB}); } @@ -8167,13 +8251,18 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, // If the only instruction in the block is a seteq/setne comparison against a // constant, try to simplify the block. - if (ICmpInst *ICI = dyn_cast(I)) + if (ICmpInst *ICI = dyn_cast(I)) { if (ICI->isEquality() && isa(ICI->getOperand(1))) { ++I; if (I->isTerminator() && tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder)) return true; + if (isa(I) && I->getNextNode()->isTerminator() && + tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast(I), + Builder)) + return true; } + } // See if we can merge an empty landing pad block with another which is // equivalent. diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll index 6def8f4eeb089..a51b816846cdc 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll @@ -15,8 +15,8 @@ ; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @c1, ptr @c2, ptr @c3] ; ENABLE: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @g1, ptr @g2, ptr @g3] ; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @g1, ptr @g2, ptr @g3] -; ENABLE: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @f1, ptr @f2, ptr @f3] -; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @f1, ptr @f2, ptr @f3] +; ENABLE: @{{.*}} = private unnamed_addr constant [4 x ptr] [ptr @f1, ptr @f2, ptr @f3, ptr @f4] +; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [4 x ptr] [ptr @f1, ptr @f2, ptr @f3, ptr @f4] target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7a--none-eabi" diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll index 25267dcc6dbcb..48be76c19e48f 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll @@ -410,13 +410,12 @@ define i1 @single_value_with_mask(i32 %x) { ; OPTNOLUT-NEXT: i32 21, label %[[END]] ; OPTNOLUT-NEXT: i32 48, label %[[END]] ; OPTNOLUT-NEXT: i32 16, label %[[END]] +; OPTNOLUT-NEXT: i32 80, label %[[END]] ; OPTNOLUT-NEXT: ] ; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 -; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true ; OPTNOLUT-NEXT: br label %[[END]] ; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ] +; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ true, %[[DEFAULT]] ], [ false, %[[ENTRY]] ] ; OPTNOLUT-NEXT: ret i1 [[RES]] ; ; TTINOLUT-LABEL: define i1 @single_value_with_mask( diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll index ef5aee68e268e..64016f3a4b97c 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll @@ -1314,6 +1314,136 @@ if.end: ret void } +define i32 @switch_with_icmp_select_after_it(i32 %x) { +; CHECK-LABEL: @switch_with_icmp_select_after_it( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 18, label [[END:%.*]] +; CHECK-NEXT: i32 21, label [[END]] +; CHECK-NEXT: i32 48, label [[END]] +; CHECK-NEXT: i32 16, label [[END]] +; CHECK-NEXT: i32 80, label [[SWITCH_EDGE:%.*]] +; CHECK-NEXT: ] +; CHECK: switch.edge: +; CHECK-NEXT: br label [[END]] +; CHECK: default: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 3, [[DEFAULT]] ], [ 2, [[SWITCH_EDGE]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] +default: + %cmp = icmp eq i32 %x, 80 + ; Create a new switch case BB for case 80. + %sel = select i1 %cmp, i32 2, i32 3 + br label %end +end: + %res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ] + ret i32 %res +} + +define i32 @switch_with_icmp_select_after_it2(i32 %x) { +; CHECK-LABEL: @switch_with_icmp_select_after_it2( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 18, label [[END:%.*]] +; CHECK-NEXT: i32 21, label [[END]] +; CHECK-NEXT: i32 48, label [[END]] +; CHECK-NEXT: i32 16, label [[END]] +; CHECK-NEXT: i32 80, label [[END]] +; CHECK-NEXT: ] +; CHECK: default: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 3, [[DEFAULT]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] +default: + %cmp = icmp eq i32 %x, 80 + ; Should not create new case BB + %sel = select i1 %cmp, i32 1, i32 3 + br label %end +end: + %res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ] + ret i32 %res +} + +define i32 @switch_with_icmp_select_after_it3(i32 %x) { +; CHECK-LABEL: @switch_with_icmp_select_after_it3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 80 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 3, i32 1 +; CHECK-NEXT: ret i32 [[SEL]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] +default: + %cmp = icmp eq i32 %x, 80 + ; Should not create new case BB + %sel = select i1 %cmp, i32 3, i32 1 + br label %end +end: + %res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ] + ret i32 %res +} + +; TODO: support this case (multi-phis). +define i32 @switch_with_icmp_select_after_it_multi_phis(i32 %x) { +; CHECK-LABEL: @switch_with_icmp_select_after_it_multi_phis( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 18, label [[END:%.*]] +; CHECK-NEXT: i32 21, label [[END]] +; CHECK-NEXT: i32 48, label [[END]] +; CHECK-NEXT: i32 16, label [[END]] +; CHECK-NEXT: ] +; CHECK: default: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 2, i32 3 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[RES1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY]] ], [ 0, [[ENTRY]] ], [ 0, [[ENTRY]] ], [ 100, [[DEFAULT]] ] +; CHECK-NEXT: [[RES2:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ [[SEL]], [[DEFAULT]] ] +; CHECK-NEXT: [[RES:%.*]] = xor i32 [[RES1]], [[RES2]] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] +default: + %cmp = icmp eq i32 %x, 80 + %sel = select i1 %cmp, i32 2, i32 3 + br label %end +end: + %res1 = phi i32 [ 0, %entry ], [ 0, %entry ], [ 0, %entry ], [ 0, %entry ], [ 100, %default ] + %res2 = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ] + %res = xor i32 %res1, %res2 + ret i32 %res +} + !0 = !{!"function_entry_count", i32 100} !1 = !{!"branch_weights", i32 6, i32 10} ;.