diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 9149f71941db4..d5d819505226c 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -357,9 +357,9 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { m_scev_AffineAddRec(m_SCEV(), m_scev_One(), m_SpecificLoop(&L))); } -/// Returns true if the last iteration can be peeled off and the condition (Pred -/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition -/// is known at the second-to-last. +/// Returns true if the last iteration can be peeled off because the known +/// monotonic condition (Pred LeftAR, RightSCEV) changes value on the last +/// iteration. static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, const SCEV *RightSCEV, ScalarEvolution &SE, @@ -378,9 +378,11 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration( SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE); - return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter, - RightSCEV) && - SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV); + CmpPredicate InvPred = ICmpInst::getInversePredicate(Pred); + return (SE.isKnownPredicate(InvPred, ValAtLastIter, RightSCEV) && + SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV)) || + (SE.isKnownPredicate(Pred, ValAtLastIter, RightSCEV) && + SE.isKnownPredicate(InvPred, ValAtSecondToLastIter, RightSCEV)); } // Return the number of iterations to peel off from the beginning and end of the diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll index 6346d7d97e826..592700eda702f 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll @@ -139,17 +139,38 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar(i32 %n) { ; CHECK-SAME: i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -2 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]] +; CHECK: [[ENTRY_SPLIT]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2 ; CHECK-NEXT: call void @foo(i32 [[SEL]]) -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 +; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP2]] +; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: +; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]] +; CHECK: [[EXIT_PEEL_BEGIN]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ] +; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] +; CHECK: [[LOOP_PEEL]]: +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = select i1 [[C_PEEL]], i32 1, i32 2 +; CHECK-NEXT: call void @foo(i32 [[SEL_LCSSA]]) +; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP3]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] +; CHECK: [[EXIT_PEEL_NEXT]]: +; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] +; CHECK: [[LOOP_PEEL_NEXT]]: +; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[SEL_LCSSA]] ; entry: @@ -214,7 +235,7 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) { ; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 ; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %sub = add i32 %n, -1 @@ -242,4 +263,5 @@ inner.latch: ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} ;.