Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/include/llvm/Transforms/Utils/LoopPeel.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
ScalarEvolution &SE, AssumptionCache *AC = nullptr,
ScalarEvolution &SE, const TargetTransformInfo &TTI,
AssumptionCache *AC = nullptr,
unsigned Threshold = UINT_MAX);

} // end namespace llvm
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ bool llvm::computeUnrollCount(
}

// 5th priority is loop peeling.
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, TTI, AC, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
Expand Down
82 changes: 61 additions & 21 deletions llvm/lib/Transforms/Utils/LoopPeel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,

bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
// The loop must execute at least 2 iterations to guarantee that peeled
// iteration executes.
// TODO: Add checks during codegen.
if (isa<SCEVCouldNotCompute>(BTC) ||
!SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType())))
if (isa<SCEVCouldNotCompute>(BTC))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be checking isHighCostExpansion() or something like that somewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, updated thanks!

return false;

// Check if the exit condition of the loop can be adjusted by the peeling
Expand Down Expand Up @@ -364,12 +361,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
/// is known at the second-to-last.
static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
const SCEVAddRecExpr *LeftAR,
const SCEV *RightSCEV,
ScalarEvolution &SE) {
const SCEV *RightSCEV, ScalarEvolution &SE,
const TargetTransformInfo &TTI) {
if (!canPeelLastIteration(L, SE))
return false;

const SCEV *BTC = SE.getBackedgeTakenCount(&L);
SCEVExpander Expander(SE, L.getHeader()->getDataLayout(), "loop-peel");
if (!SE.isKnownNonZero(BTC) &&
Expander.isHighCostExpansion(BTC, &L, SCEVCheapExpansionBudget, &TTI,
L.getLoopPredecessor()->getTerminator()))
return false;

const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE);
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration(
SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE);
Expand All @@ -391,7 +394,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
// ..
// }
static std::pair<unsigned, unsigned>
countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
const TargetTransformInfo &TTI) {
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
unsigned DesiredPeelCount = 0;
unsigned DesiredPeelCountLast = 0;
Expand Down Expand Up @@ -479,7 +483,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
const SCEV *Step = LeftAR->getStepRecurrence(SE);
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
Pred)) {
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE))
if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE, TTI))
DesiredPeelCountLast = 1;
return;
}
Expand Down Expand Up @@ -593,8 +597,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
unsigned TripCount, DominatorTree &DT,
ScalarEvolution &SE, AssumptionCache *AC,
unsigned Threshold) {
ScalarEvolution &SE, const TargetTransformInfo &TTI,
AssumptionCache *AC, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
Expand Down Expand Up @@ -656,7 +660,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}

const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
countToEliminateCompares(*L, MaxPeelCount, SE);
countToEliminateCompares(*L, MaxPeelCount, SE, TTI);
DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps);

if (DesiredPeelCount == 0)
Expand Down Expand Up @@ -822,7 +826,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
/// instructions in the last peeled-off iteration.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
BasicBlock *InsertBot,
BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
Expand Down Expand Up @@ -914,12 +918,22 @@ static void cloneLoopBlocks(
// loop iteration. Since this copy is no longer part of the loop, we
// resolve this statically:
if (PeelLast) {
// For the last iteration, we use the value from the latch of the original
// loop directly.
// For the last iteration, we introduce new phis for each header phi in
// InsertTop, using the incoming value from the preheader for the original
// preheader (when skipping the main loop) and the incoming value from the
// latch for the latch (when continuing from the main loop).
IRBuilder<> B(InsertTop->getTerminator());
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch);
PHINode *PN = B.CreatePHI(NewPHI->getType(), 2);
NewPHI->eraseFromParent();
if (OrigPreHeader)
PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(PreHeader),
OrigPreHeader);

PN->addIncoming(cast<PHINode>(&*I)->getIncomingValueForBlock(Latch),
Latch);
VMap[&*I] = PN;
}
} else {
// For the first iteration, we use the value from the preheader directly.
Expand Down Expand Up @@ -1053,7 +1067,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
// Set up all the necessary basic blocks.
BasicBlock *InsertTop;
BasicBlock *InsertBot;
BasicBlock *NewPreHeader;
BasicBlock *NewPreHeader = nullptr;
DenseMap<Instruction *, Value *> ExitValues;
if (PeelLast) {
// It is convenient to split the single exit block from the latch the
Expand Down Expand Up @@ -1084,11 +1098,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (PHINode &P : Exit->phis())
ExitValues[&P] = P.getIncomingValueForBlock(Latch);

const SCEV *BTC = SE->getBackedgeTakenCount(L);

InsertTop = SplitEdge(Latch, Exit, &DT, LI);
InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);

InsertTop->setName(Exit->getName() + ".peel.begin");
InsertBot->setName(Exit->getName() + ".peel.next");
NewPreHeader = nullptr;

// If the original loop may only execute a single iteration we need to
// insert a trip count check and skip the original loop with the last
// iteration peeled off if necessary.
if (!SE->isKnownNonZero(BTC)) {
NewPreHeader = SplitEdge(PreHeader, Header, &DT, LI);
SCEVExpander Expander(*SE, Latch->getDataLayout(), "loop-peel");

BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
Value *BTCValue =
Expander.expandCodeFor(BTC, BTC->getType(), PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
Value *Cond =
B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
B.CreateCondBr(Cond, NewPreHeader, InsertTop);
PreHeaderBR->eraseFromParent();

// PreHeader now dominates InsertTop.
DT.changeImmediateDominator(InsertTop, PreHeader);
}
} else {
// It is convenient to split the preheader into 3 parts - two blocks to
// anchor the peeled copy of the loop body, and a new preheader for the
Expand Down Expand Up @@ -1162,8 +1199,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;

cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot,
NewPreHeader ? PreHeader : nullptr, ExitEdges, NewBlocks,
LoopBlocks, VMap, LVMap, &DT, LI,
LoopLocalNoAliasDeclScopes, *SE);

// Remap to use values from the current iteration instead of the
Expand Down Expand Up @@ -1216,9 +1254,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,

if (PeelLast) {
// Now adjust users of the original exit values by replacing them with the
// exit value from the peeled iteration.
for (const auto &[P, E] : ExitValues)
// exit value from the peeled iteration and remove them.
for (const auto &[P, E] : ExitValues) {
P->replaceAllUsesWith(isa<Constant>(E) ? E : &*VMap.lookup(E));
P->eraseFromParent();
}
formLCSSA(*L, DT, LI, SE);
} else {
// Now adjust the phi nodes in the loop header to get their initial values
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,41 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
; BUDGET3-NEXT: [[ENTRY:.*]]:
; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
; BUDGET3: [[ENTRY_SPLIT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET3: [[LOOP_HEADER]]:
; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; BUDGET3: [[THEN]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH]]
; BUDGET3: [[LOOP_LATCH]]:
; BUDGET3-NEXT: [[IV_NEXT_PEEL]] = add nsw i32 [[TMP3]], 1
; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1
; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]]
; BUDGET3: [[EXIT_PEEL_BEGIN]]:
; BUDGET3-NEXT: [[TMP3:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[DOTPH]], %[[EXIT_PEEL_BEGIN_LOOPEXIT]] ]
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL]]:
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
; BUDGET3: [[THEN_PEEL]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]]
; BUDGET3: [[LOOP_LATCH_PEEL]]:
; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
; BUDGET3: [[EXIT_PEEL_NEXT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]:
; BUDGET3-NEXT: br label %[[EXIT:.*]]
; BUDGET3: [[EXIT]]:
; BUDGET3-NEXT: ret i32 0
;
Expand All @@ -59,3 +83,7 @@ loop.latch:
exit:
ret i32 0
}
;.
; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ define i64 @peel_single_block_loop_iv_step_1() {
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63
Expand Down Expand Up @@ -92,7 +91,6 @@ define i64 @peel_single_block_loop_iv_step_1_eq_pred() {
; CHECK-NEXT: br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP_PEEL1:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
Expand Down Expand Up @@ -171,7 +169,6 @@ define i64 @peel_single_block_loop_iv_step_1_nested_loop() {
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[OUTER_LATCH_PEEL_BEGIN]]:
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
Expand Down Expand Up @@ -237,7 +234,6 @@ define i64 @peel_multi_block_loop_iv_step_1() {
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63
Expand Down Expand Up @@ -365,7 +361,6 @@ define i64 @peel_single_block_loop_iv_step_1_btc_1() {
; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT_PEEL_BEGIN]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ]
; CHECK-NEXT: br label %[[LOOP_PEEL:.*]]
; CHECK: [[LOOP_PEEL]]:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 1
Expand Down Expand Up @@ -483,9 +478,8 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) {
; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99
; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[LOOPEXIT_PEEL_BEGIN]]:
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
; CHECK: [[LOOP_HEADER_PEEL]]:
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[IV]], 99
Expand Down
Loading