diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 27e70c5ddc0fc..1150c1c5a99cf 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -381,9 +381,23 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration( SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE); - return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter, - RightSCEV) && - SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV); + CmpPredicate InvPred = ICmpInst::getInversePredicate(Pred); + if (!SE.isKnownPredicate(InvPred, ValAtLastIter, RightSCEV) || + !SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV)) + return false; + + // For a monotonic predicate, we've found the single transition point and + // don't need to check further. + if (!ICmpInst::isEquality(Pred)) + return true; + + // For an equality, the condition can toggle twice. For the peeling to be + // profitable, we need to know that this is the first toggle. The two + // toggles are separate by one iteration. If the BTC isn't at least two + // this will overflow, but that's okay correctness wise. + const SCEV *ValAtPriorIter = LeftAR->evaluateAtIteration( + SE.getMinusSCEV(BTC, SE.getConstant(BTC->getType(), 2)), SE); + return SE.isKnownPredicate(Pred, ValAtPriorIter, RightSCEV); } // Return the number of iterations to peel off from the beginning and end of the diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll index 5704be33f0f0b..34b53c54d9bc6 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll @@ -176,39 +176,18 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar2(i32 %n) { ; CHECK-SAME: i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -2 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]] -; CHECK: [[ENTRY_SPLIT]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[IV]], [[SUB]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2 -; CHECK-NEXT: call void @foo(i32 [[SEL]]) -; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP2]] -; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]] -; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: -; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ] -; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]] -; CHECK: [[EXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ne i32 [[TMP3]], [[SUB]] ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = select i1 [[C_PEEL]], i32 1, i32 2 ; CHECK-NEXT: call void @foo(i32 [[SEL_LCSSA]]) -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP3]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[TMP3]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; CHECK: [[EXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i32 [[SEL_LCSSA]] +; CHECK-NEXT: [[SEL_LCSSA1:%.*]] = phi i32 [ [[SEL_LCSSA]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[SEL_LCSSA1]] ; entry: %sub = add i32 %n, -2 @@ -245,7 +224,7 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi_cmp_not_invar3(i32 %n) { ; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: ; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]] @@ -363,7 +342,7 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) { ; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 ; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %sub = add i32 %n, -1 @@ -392,5 +371,4 @@ inner.latch: ; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]} ;.