diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h index 49dbc9aa1f2a9..dd59a9c766e45 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -44,8 +44,7 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, - ScalarEvolution &SE, const TargetTransformInfo &TTI, - AssumptionCache *AC = nullptr, + ScalarEvolution &SE, AssumptionCache *AC = nullptr, unsigned Threshold = UINT_MAX); } // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index afa7abfea419e..0b9fee5727c6f 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1014,7 +1014,7 @@ bool llvm::computeUnrollCount( } // 5th priority is loop peeling. - computePeelCount(L, LoopSize, PP, TripCount, DT, SE, TTI, AC, UP.Threshold); + computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold); if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index bd025fddd0cf7..f348d24ec24fb 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -38,7 +38,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -331,7 +330,11 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { const SCEV *BTC = SE.getBackedgeTakenCount(&L); - if (isa(BTC)) + // The loop must execute at least 2 iterations to guarantee that peeled + // iteration executes. + // TODO: Add checks during codegen. + if (isa(BTC) || + !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType()))) return false; // Check if the exit condition of the loop can be adjusted by the peeling @@ -351,7 +354,6 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { m_BasicBlock(Succ1), m_BasicBlock(Succ2))) && ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) || (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) && - Bound->getType()->isIntegerTy() && SE.isLoopInvariant(SE.getSCEV(Bound), &L) && match(SE.getSCEV(Inc), m_scev_AffineAddRec(m_SCEV(), m_scev_One(), m_SpecificLoop(&L))); @@ -362,18 +364,12 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { /// is known at the second-to-last. static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, const SCEVAddRecExpr *LeftAR, - const SCEV *RightSCEV, ScalarEvolution &SE, - const TargetTransformInfo &TTI) { + const SCEV *RightSCEV, + ScalarEvolution &SE) { if (!canPeelLastIteration(L, SE)) return false; const SCEV *BTC = SE.getBackedgeTakenCount(&L); - SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel"); - if (!SE.isKnownNonZero(BTC) && - Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI, - L.getLoopPredecessor()->getTerminator())) - return false; - const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE); const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration( SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE); @@ -395,8 +391,7 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, // .. // } static std::pair -countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, - const TargetTransformInfo &TTI) { +countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; unsigned DesiredPeelCountLast = 0; @@ -484,7 +479,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE, const SCEV *Step = LeftAR->getStepRecurrence(SE); if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step, Pred)) { - if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI)) + if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE)) DesiredPeelCountLast = 1; return; } @@ -598,8 +593,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) { void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, unsigned TripCount, DominatorTree &DT, - ScalarEvolution &SE, const TargetTransformInfo &TTI, - AssumptionCache *AC, unsigned Threshold) { + ScalarEvolution &SE, AssumptionCache *AC, + unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. @@ -661,7 +656,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, } const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] = - countToEliminateCompares(*L, MaxPeelCount, SE, TTI); + countToEliminateCompares(*L, MaxPeelCount, SE); DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps); if (DesiredPeelCount == 0) @@ -827,7 +822,7 @@ static void initBranchWeights(DenseMap &WeightInfos, /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, - BasicBlock *InsertBot, BasicBlock *OrigPreHeader, + BasicBlock *InsertBot, SmallVectorImpl> &ExitEdges, SmallVectorImpl &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, @@ -919,22 +914,12 @@ static void cloneLoopBlocks( // loop iteration. Since this copy is no longer part of the loop, we // resolve this statically: if (PeelLast) { - // For the last iteration, we introduce new phis for each header phi in - // InsertTop, using the incoming value from the preheader for the original - // preheader (when skipping the main loop) and the incoming value from the - // latch for the latch (when continuing from the main loop). - IRBuilder<> B(InsertTop, InsertTop->getFirstNonPHIIt()); + // For the last iteration, we use the value from the latch of the original + // loop directly. for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { PHINode *NewPHI = cast(VMap[&*I]); - PHINode *PN = B.CreatePHI(NewPHI->getType(), 2); + VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch); NewPHI->eraseFromParent(); - if (OrigPreHeader) - PN->addIncoming(cast(&*I)->getIncomingValueForBlock(PreHeader), - OrigPreHeader); - - PN->addIncoming(cast(&*I)->getIncomingValueForBlock(Latch), - Latch); - VMap[&*I] = PN; } } else { // For the first iteration, we use the value from the preheader directly. @@ -1068,7 +1053,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, // Set up all the necessary basic blocks. BasicBlock *InsertTop; BasicBlock *InsertBot; - BasicBlock *NewPreHeader = nullptr; + BasicBlock *NewPreHeader; DenseMap ExitValues; if (PeelLast) { // It is convenient to split the single exit block from the latch the @@ -1099,34 +1084,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, for (PHINode &P : Exit->phis()) ExitValues[&P] = P.getIncomingValueForBlock(Latch); - const SCEV *BTC = SE->getBackedgeTakenCount(L); - InsertTop = SplitEdge(Latch, Exit, &DT, LI); InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); InsertTop->setName(Exit->getName() + ".peel.begin"); InsertBot->setName(Exit->getName() + ".peel.next"); - NewPreHeader = nullptr; - - // If the original loop may only execute a single iteration we need to - // insert a trip count check and skip the original loop with the last - // iteration peeled off if necessary. - if (!SE->isKnownNonZero(BTC)) { - NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI); - SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel"); - - BranchInst *PreHeaderBR = cast(PreHeader->getTerminator()); - Value *BTCValue = - Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR); - IRBuilder<> B(PreHeaderBR); - Value *Cond = - B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0)); - B.CreateCondBr(Cond, NewPreHeader, InsertTop); - PreHeaderBR->eraseFromParent(); - - // PreHeader now dominates InsertTop. - DT.changeImmediateDominator(InsertTop, PreHeader); - } } else { // It is convenient to split the preheader into 3 parts - two blocks to // anchor the peeled copy of the loop body, and a new preheader for the @@ -1200,9 +1162,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector NewBlocks; - cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, - NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks, - LoopBlocks, VMap, LVMap, &DT, LI, + cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges, + NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI, LoopLocalNoAliasDeclScopes, *SE); // Remap to use values from the current iteration instead of the @@ -1255,11 +1216,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, if (PeelLast) { // Now adjust users of the original exit values by replacing them with the - // exit value from the peeled iteration and remove them. - for (const auto &[P, E] : ExitValues) { + // exit value from the peeled iteration. + for (const auto &[P, E] : ExitValues) P->replaceAllUsesWith(isa(E) ? E : &*VMap.lookup(E)); - P->eraseFromParent(); - } formLCSSA(*L, DT, LI, SE); } else { // Now adjust the phi nodes in the loop header to get their initial values diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll index f3910f9bfc399..342c70170fef3 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll @@ -25,41 +25,17 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) { ; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) { ; BUDGET3-NEXT: [[ENTRY:.*]]: ; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1 -; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]] -; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 -; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]] -; BUDGET3: [[ENTRY_SPLIT]]: ; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]] ; BUDGET3: [[LOOP_HEADER]]: -; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] -; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LOOP_LATCH:.*]] ] +; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] +; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN:.*]], label %[[LOOP_LATCH]] ; BUDGET3: [[THEN]]: ; BUDGET3-NEXT: br label %[[LOOP_LATCH]] ; BUDGET3: [[LOOP_LATCH]]: -; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1 -; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]] -; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] -; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: -; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ] -; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]] -; BUDGET3: [[EXIT_PEEL_BEGIN]]: -; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ] -; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] -; BUDGET3: [[LOOP_HEADER_PEEL]]: -; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] -; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]] -; BUDGET3: [[THEN_PEEL]]: -; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]] -; BUDGET3: [[LOOP_LATCH_PEEL]]: -; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1 +; BUDGET3-NEXT: [[IV_NEXT_PEEL]] = add nsw i32 [[TMP3]], 1 ; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]] -; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; BUDGET3: [[EXIT_PEEL_NEXT]]: -; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]] -; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]: -; BUDGET3-NEXT: br label %[[EXIT:.*]] +; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT:.*]], label %[[LOOP_HEADER]] ; BUDGET3: [[EXIT]]: ; BUDGET3-NEXT: ret i32 0 ; @@ -83,7 +59,3 @@ loop.latch: exit: ret i32 0 } -;. -; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} -; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} -;. diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll index 5c9b2fa6a9d60..f1290069bda0c 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-constant-trip-count.ll @@ -13,6 +13,7 @@ define i64 @peel_single_block_loop_iv_step_1() { ; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[EXIT_PEEL_BEGIN]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ] ; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] ; CHECK: [[LOOP_PEEL]]: ; CHECK-NEXT: [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63 @@ -91,6 +92,7 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() { ; CHECK-NEXT: br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: [[EXIT_PEEL_BEGIN]]: ; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] ; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] ; CHECK: [[LOOP_PEEL]]: ; CHECK-NEXT: [[CMP_PEEL1:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 @@ -169,6 +171,7 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() { ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[OUTER_LATCH_PEEL_BEGIN]]: ; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] ; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] ; CHECK: [[LOOP_PEEL]]: ; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 @@ -234,6 +237,7 @@ define i64 @peel_multi_block_loop_iv_step_1() { ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[EXIT_PEEL_BEGIN]]: ; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ] ; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] ; CHECK: [[LOOP_PEEL]]: ; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 @@ -361,6 +365,7 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() { ; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT_PEEL_BEGIN]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ] ; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] ; CHECK: [[LOOP_PEEL]]: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 1 @@ -478,8 +483,9 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) { ; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99 ; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[LOOPEXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ] ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] ; CHECK: [[LOOP_HEADER_PEEL]]: ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[IV]], 99 diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll deleted file mode 100644 index af07a97131322..0000000000000 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll +++ /dev/null @@ -1,191 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-unroll -unroll-full-max-count=0 -S %s | FileCheck %s - -declare void @foo(i32) - -define void @peel_with_guard_known_nonnegative_1(i32 %n) { -; CHECK-LABEL: define void @peel_with_guard_known_nonnegative_1( -; CHECK-SAME: i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[PRECOND:%.*]] = icmp slt i32 [[N]], 0 -; CHECK-NEXT: br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[PH:.*]] -; CHECK: [[PH]]: -; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[N_1:%.*]] = add i32 [[N]], 1 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N_1]] to i64 -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C]], i32 10, i32 20 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT_LOOPEXIT]]: -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - %precond = icmp slt i32 %n, 0 - br i1 %precond, label %exit, label %ph - -ph: - %n.ext = zext i32 %n to i64 - %n.1 = add i32 %n, 1 - %wide.trip.count = zext i32 %n.1 to i64 - br label %loop - -loop: - %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ] - %c = icmp eq i64 %iv, %n.ext - %spec.select = select i1 %c, i32 10, i32 20 - %iv.next = add i64 %iv, 1 - %ec = icmp eq i64 %iv.next, %wide.trip.count - br i1 %ec, label %exit, label %loop - -exit: - ret void -} - -define i32 @peel_with_guard_known_nonnegative_2(ptr %x, i32 %w) { -; CHECK-LABEL: define i32 @peel_with_guard_known_nonnegative_2( -; CHECK-SAME: ptr [[X:%.*]], i32 [[W:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[PRECOND:%.*]] = icmp sgt i32 [[W]], 0 -; CHECK-NEXT: br i1 [[PRECOND]], label %[[PH:.*]], label %[[EXIT:.*]] -; CHECK: [[PH]]: -; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W]], -1 -; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[SUB]] to i64 -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[W]] to i64 -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 0, %[[PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[IV]], [[TMP0]] -; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: tail call void @foo() -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP1]], [[RED]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT_LOOPEXIT]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD_LCSSA]], %[[EXIT_LOOPEXIT]] ] -; CHECK-NEXT: ret i32 [[RED_LCSSA]] -; -entry: - %precond = icmp sgt i32 %w, 0 - br i1 %precond, label %ph, label %exit - -ph: - %sub = add nsw i32 %w, -1 - %0 = zext nneg i32 %sub to i64 - %wide.trip.count = zext nneg i32 %w to i64 - br label %loop.header - -loop.header: - %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop.latch ] - %red = phi i32 [ 0, %ph ], [ %add, %loop.latch ] - %cmp1 = icmp eq i64 %iv, %0 - br i1 %cmp1, label %if.then, label %loop.latch - -if.then: - tail call void @foo() - br label %loop.latch - -loop.latch: - %arrayidx = getelementptr inbounds nuw i32, ptr %x, i64 %iv - %1 = load i32, ptr %arrayidx, align 4 - %add = add nsw i32 %1, %red - %iv.next = add nuw nsw i64 %iv, 1 - %ec = icmp eq i64 %iv.next, %wide.trip.count - br i1 %ec, label %exit, label %loop.header - -exit: - %red.lcssa = phi i32 [ 0, %entry ], [ %add, %loop.latch ] - ret i32 %red.lcssa -} - -define void @peel_with_guard2(i32 %n) { -; CHECK-LABEL: define void @peel_with_guard2( -; CHECK-SAME: i32 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[PRECOND:%.*]] = icmp eq i32 [[SUB]], 0 -; CHECK-NEXT: br i1 [[PRECOND]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] -; CHECK: [[LOOP_HEADER_PREHEADER]]: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_HEADER_PREHEADER_SPLIT:.*]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN:.*]] -; CHECK: [[LOOP_HEADER_PREHEADER_SPLIT]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 1, %[[LOOP_HEADER_PREHEADER_SPLIT]] ] -; CHECK-NEXT: br i1 false, label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]]: -; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_PEEL_BEGIN]] -; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 1, %[[LOOP_HEADER_PREHEADER]] ], [ [[DOTPH]], %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] -; CHECK: [[LOOP_HEADER_PEEL]]: -; CHECK-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]] -; CHECK-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]] -; CHECK: [[THEN_PEEL]]: -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: br label %[[LOOP_LATCH_PEEL]] -; CHECK: [[LOOP_LATCH_PEEL]]: -; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw i32 [[TMP3]], 1 -; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[N]] -; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_LOOPEXIT_PEEL_NEXT:.*]], label %[[EXIT_LOOPEXIT_PEEL_NEXT]] -; CHECK: [[EXIT_LOOPEXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]] -; CHECK: [[LOOP_HEADER_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] -; CHECK: [[EXIT_LOOPEXIT]]: -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - %sub = add i32 %n, -1 - %precond = icmp eq i32 %sub, 0 - br i1 %precond, label %exit, label %loop.header - -loop.header: - %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ] - %c = icmp eq i32 %iv, %sub - br i1 %c, label %then, label %loop.latch - -then: - call void @foo() - br label %loop.latch - -loop.latch: - %iv.next = add nuw i32 %iv, 1 - %ec = icmp eq i32 %iv.next, %n - br i1 %ec, label %exit, label %loop.header - -exit: - ret void -} -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} -;. diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll index 6346d7d97e826..75f3674732f35 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p loop-unroll -unroll-full-max-count=0 -S %s | FileCheck %s + declare void @foo(i32) define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) { @@ -8,36 +9,18 @@ define i32 @peel_last_with_trip_count_check_lcssa_phi(i32 %n) { ; CHECK-SAME: i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[SUB]], 0 -; CHECK-NEXT: br i1 [[TMP0]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]] -; CHECK: [[ENTRY_SPLIT]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] -; CHECK-NEXT: call void @foo(i32 2) -; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], [[TMP1]] -; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[EXIT_PEEL_BEGIN_LOOPEXIT]]: -; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP]] ] -; CHECK-NEXT: br label %[[EXIT_PEEL_BEGIN]] -; CHECK: [[EXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 1, i32 2 ; CHECK-NEXT: call void @foo(i32 [[SEL]]) -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; CHECK: [[EXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[SEL_LCSSA]] ; entry: %sub = add i32 %n, -1 @@ -177,44 +160,21 @@ define void @peel_last_with_trip_count_check_nested_loop(i32 %n) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] -; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]]: -; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT1:%.*]], %[[INNER_LATCH:.*]] ] -; CHECK-NEXT: br label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN:.*]] -; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[DOTPH]], %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ] -; CHECK-NEXT: br label %[[INNER_HEADER_PEEL:.*]] -; CHECK: [[INNER_HEADER_PEEL]]: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] -; CHECK-NEXT: br i1 [[C]], label %[[INNER_LATCH_PEEL:.*]], label %[[THEN_PEEL:.*]] -; CHECK: [[THEN_PEEL]]: -; CHECK-NEXT: call void @foo(i32 1) -; CHECK-NEXT: br label %[[INNER_LATCH_PEEL]] -; CHECK: [[INNER_LATCH_PEEL]]: -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]] -; CHECK: [[OUTER_HEADER_LOOPEXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[INNER_HEADER_PEEL_NEXT:.*]] -; CHECK: [[INNER_HEADER_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[OUTER_HEADER_LOOPEXIT:.*]] -; CHECK: [[OUTER_HEADER_LOOPEXIT]]: +; CHECK: [[OUTER_HEADER_LOOPEXIT:.*]]: ; CHECK-NEXT: br label %[[OUTER_HEADER]] ; CHECK: [[OUTER_HEADER]]: -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[SUB]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label %[[OUTER_HEADER_SPLIT:.*]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN]] -; CHECK: [[OUTER_HEADER_SPLIT]]: ; CHECK-NEXT: br label %[[INNER_HEADER:.*]] ; CHECK: [[INNER_HEADER]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[OUTER_HEADER_SPLIT]] ], [ [[IV_NEXT1]], %[[INNER_LATCH]] ] -; CHECK-NEXT: br i1 false, label %[[INNER_LATCH]], label %[[THEN:.*]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[IV_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]] +; CHECK-NEXT: br i1 [[C]], label %[[INNER_LATCH]], label %[[THEN:.*]] ; CHECK: [[THEN]]: ; CHECK-NEXT: call void @foo(i32 1) ; CHECK-NEXT: br label %[[INNER_LATCH]] ; CHECK: [[INNER_LATCH]]: -; CHECK-NEXT: [[IV_NEXT1]] = add nuw i32 [[IV1]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[EXITCOND_NOT1:%.*]] = icmp eq i32 [[IV_NEXT1]], [[TMP2]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT1]], label %[[OUTER_HEADER_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]], label %[[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[OUTER_HEADER_LOOPEXIT]], label %[[INNER_HEADER]] ; entry: %sub = add i32 %n, -1 @@ -234,12 +194,6 @@ then: inner.latch: %iv.next = add i32 %iv, 1 - %ec = icmp eq i32 %iv.next, %n - br i1 %ec, label %outer.header, label %inner.header + %exitcond.not = icmp eq i32 %iv.next, %n + br i1 %exitcond.not, label %outer.header, label %inner.header } - -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll index 7d38c18d10667..f57fb2d9b7057 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-and-peel-last-iteration.ll @@ -20,6 +20,7 @@ define i32 @peel_last_iter_of_outer_lcssa_phi_with_constant_after_unrolling_inne ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[OUTER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[EXIT_PEEL_BEGIN]]: ; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i16 [ [[IV_NEXT]], %[[INNER_LATCH]] ] +; CHECK-NEXT: [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ 1, %[[INNER_LATCH]] ] ; CHECK-NEXT: br label %[[OUTER_HEADER_PEEL:.*]] ; CHECK: [[OUTER_HEADER_PEEL]]: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i16 [[IV_NEXT_LCSSA]], 999