Skip to content

Commit 0f8aedf

Browse files
committed
!fixup address latest comments, thanks!
1 parent 3259e66 commit 0f8aedf

File tree

6 files changed

+98
-180
lines changed

6 files changed

+98
-180
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,6 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
4343
cl::desc("Enable recognition of non-constant strided "
4444
"pointer induction variables."));
4545

46-
static cl::opt<bool>
47-
EnableEarlyExitVectorization("enable-early-exit-vectorization",
48-
cl::init(false), cl::Hidden, cl::desc(""));
49-
5046
namespace llvm {
5147
cl::opt<bool>
5248
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
@@ -1381,7 +1377,7 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence(
13811377
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
13821378
// When vectorizing early exits, create predicates for all blocks, except the
13831379
// header.
1384-
if (canVectorizeEarlyExit() && BB != TheLoop->getHeader())
1380+
if (hasUncountableEarlyExit() && BB != TheLoop->getHeader())
13851381
return true;
13861382
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
13871383
}
@@ -1523,8 +1519,6 @@ bool LoopVectorizationLegality::canVectorizeEarlyExit() const {
15231519
// Currently only allow vectorizing loops with early exits, if early-exit
15241520
// vectorization is explicitly enabled and the loop has metadata to force
15251521
// vectorization.
1526-
if (!EnableEarlyExitVectorization)
1527-
return false;
15281522

15291523
SmallVector<BasicBlock *> Exiting;
15301524
TheLoop->getExitingBlocks(Exiting);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 65 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
384384
cl::Hidden,
385385
cl::desc("Try wider VFs if they enable the use of vector variants"));
386386

387+
static cl::opt<bool> EnableEarlyExitVectorization(
388+
"enable-early-exit-vectorization", cl::init(false), cl::Hidden,
389+
cl::desc(
390+
"Enable vectorization of early exit loops with uncountable exits."));
391+
387392
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
388393
// variables not overflowing do not hold. See `emitSCEVChecks`.
389394
static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
@@ -1358,14 +1363,13 @@ class LoopVectorizationCostModel {
13581363
LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n");
13591364
return false;
13601365
}
1361-
// If we might exit from anywhere but the latch, must run the exiting
1362-
// iteration in scalar form.
1363-
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1364-
if (!Legal->canVectorizeEarlyExit()) {
1365-
LLVM_DEBUG(
1366-
dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1367-
return true;
1368-
}
1366+
// If we might exit from anywhere but the latch and early exit vectorization
1367+
// is disabled, we must run the exiting iteration in scalar form.
1368+
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() &&
1369+
!(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit())) {
1370+
LLVM_DEBUG(
1371+
dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1372+
return true;
13691373
}
13701374
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
13711375
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
@@ -2576,7 +2580,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25762580
assert(LoopVectorPreHeader && "Invalid loop structure");
25772581
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
25782582
assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector()) ||
2579-
Legal->canVectorizeEarlyExit()) &&
2583+
Legal->hasUncountableEarlyExit()) &&
25802584
"multiple exit loop without required epilogue?");
25812585

25822586
LoopMiddleBlock =
@@ -2809,6 +2813,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
28092813
}
28102814
}
28112815

2816+
assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
2817+
"Expected a single exit block for escaping values");
28122818
for (auto &I : MissingVals) {
28132819
PHINode *PHI = cast<PHINode>(I.first);
28142820
// One corner case we have to handle is two IVs "chasing" each-other,
@@ -2819,9 +2825,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
28192825
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
28202826
PHI->addIncoming(I.second, MiddleBlock);
28212827
}
2822-
2823-
assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
2824-
"Expected a single exit block");
28252828
}
28262829

28272830
namespace {
@@ -3597,7 +3600,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
35973600
for (BasicBlock *E : Exiting) {
35983601
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
35993602
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() &&
3600-
(TheLoop->getLoopLatch() == E || !Legal->canVectorizeEarlyExit()))
3603+
(TheLoop->getLoopLatch() == E || !Legal->hasUncountableEarlyExit()))
36013604
AddToWorklistIfAllowed(Cmp);
36023605
}
36033606

@@ -8144,7 +8147,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
81448147
// If source is an exiting block, we know the exit edge is dynamically dead
81458148
// in the vector loop, and thus we don't need to restrict the mask. Avoid
81468149
// adding uses of an otherwise potentially dead instruction.
8147-
if (!Legal->canVectorizeEarlyExit() && OrigLoop->isLoopExiting(Src))
8150+
if (!Legal->hasUncountableEarlyExit() && OrigLoop->isLoopExiting(Src))
81488151
return EdgeMaskCache[Edge] = SrcMask;
81498152

81508153
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8835,39 +8838,43 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
88358838
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
88368839
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
88378840
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8838-
auto *MiddleVPBB = Plan.getMiddleBlock();
8839-
// No edge from the middle block to the unique exit block has been inserted
8840-
// and there is nothing to fix from vector loop; phis should have incoming
8841-
// from scalar loop only.
8842-
if (MiddleVPBB->getNumSuccessors() != 2)
8843-
return {};
88448841
SetVector<VPIRInstruction *> ExitUsersToFix;
8845-
VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);
8846-
BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
8847-
for (VPRecipeBase &R : *ExitVPBB) {
8848-
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8849-
if (!ExitIRI)
8850-
continue;
8851-
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
8852-
if (!ExitPhi)
8853-
break;
8854-
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
8855-
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
8856-
// Exit values for inductions are computed and updated outside of VPlan and
8857-
// independent of induction recipes.
8858-
// TODO: Compute induction exit values in VPlan.
8859-
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8860-
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8861-
isa<VPWidenPointerInductionRecipe>(V) ||
8862-
(isa<Instruction>(IncomingValue) &&
8863-
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8864-
any_of(IncomingValue->users(), [&Inductions](User *U) {
8865-
auto *P = dyn_cast<PHINode>(U);
8866-
return P && Inductions.contains(P);
8867-
})))
8842+
for (VPBlockBase *VPB : vp_depth_first_shallow(
8843+
Plan.getVectorLoopRegion()->getSingleSuccessor())) {
8844+
if (VPB->getNumSuccessors() != 0 || VPB == Plan.getScalarHeader())
88688845
continue;
8869-
ExitUsersToFix.insert(ExitIRI);
8870-
ExitIRI->addOperand(V);
8846+
auto *ExitVPBB = cast<VPIRBasicBlock>(VPB);
8847+
BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock();
8848+
BasicBlock *ExitingBB = find_singleton<BasicBlock>(
8849+
to_vector(predecessors(ExitBB)),
8850+
[OrigLoop](BasicBlock *Pred, bool AllowRepeats) {
8851+
return OrigLoop->contains(Pred) ? Pred : nullptr;
8852+
});
8853+
for (VPRecipeBase &R : *ExitVPBB) {
8854+
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8855+
if (!ExitIRI)
8856+
continue;
8857+
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
8858+
if (!ExitPhi)
8859+
break;
8860+
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
8861+
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
8862+
// Exit values for inductions are computed and updated outside of VPlan
8863+
// and independent of induction recipes.
8864+
// TODO: Compute induction exit values in VPlan.
8865+
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8866+
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8867+
isa<VPWidenPointerInductionRecipe>(V) ||
8868+
(isa<Instruction>(IncomingValue) &&
8869+
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8870+
any_of(IncomingValue->users(), [&Inductions](User *U) {
8871+
auto *P = dyn_cast<PHINode>(U);
8872+
return P && Inductions.contains(P);
8873+
})))
8874+
continue;
8875+
ExitUsersToFix.insert(ExitIRI);
8876+
ExitIRI->addOperand(V);
8877+
}
88718878
}
88728879
return ExitUsersToFix;
88738880
}
@@ -9168,16 +9175,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91689175
"VPBasicBlock");
91699176
RecipeBuilder.fixHeaderPhis();
91709177

9171-
if (Legal->canVectorizeEarlyExit()) {
9172-
VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop,
9173-
RecipeBuilder);
9174-
} else {
9178+
if (Legal->hasUncountableEarlyExit()) {
9179+
VPlanTransforms::handleUncountableEarlyExit(*Plan, *PSE.getSE(), OrigLoop,
9180+
RecipeBuilder);
9181+
}
91759182
addScalarResumePhis(RecipeBuilder, *Plan);
91769183
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
91779184
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
91789185
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
91799186
addUsersInExitBlock(*Plan, ExitUsersToFix);
9180-
}
91819187

91829188
// ---------------------------------------------------------------------------
91839189
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9940,12 +9946,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99409946
}
99419947

99429948
if (LVL.hasUncountableEarlyExit()) {
9943-
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
9944-
"early exit is not yet supported",
9945-
"Auto-vectorization of loops with uncountable "
9946-
"early exit is not yet supported",
9947-
"UncountableEarlyExitLoopsUnsupported", ORE, L);
9948-
return false;
9949+
if (!EnableEarlyExitVectorization) {
9950+
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
9951+
"early exit is not yet supported",
9952+
"Auto-vectorization of loops with uncountable "
9953+
"early exit is not yet supported",
9954+
"UncountableEarlyExitLoopsUnsupported", ORE,
9955+
L);
9956+
return false;
9957+
}
99499958
}
99509959

99519960
// Entrance to the VPlan-native vectorization path. Outer loops are processed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -878,15 +878,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
878878
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
879879

880880
// Create SCEV and VPValue for the trip count.
881-
882-
// Currently only loops with countable exits are vectorized, but calling
883-
// getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
884-
// uncountable exits whilst also ensuring the symbolic maximum and known
885-
// back-edge taken count remain identical for loops with countable exits.
881+
// We use the symbolic max backedge-taken-count, which is used when
882+
// vectorizing loops with uncountable early exits
886883
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
887-
assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
888-
BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) &&
889-
"Invalid loop count");
890884
ScalarEvolution &SE = *PSE.getSE();
891885
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
892886
InductionTy, TheLoop);
@@ -922,6 +916,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
922916
// 3) Otherwise, construct a runtime check.
923917
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
924918
if (!IRExitBlock) {
919+
// If there's no unique exit block (i.e. vectorizing with an uncountable
920+
// early exit), use the block exiting from the latch. The other uncountable
921+
// exit blocks will be added later.
925922
auto *Term = cast<BranchInst>(TheLoop->getLoopLatch()->getTerminator());
926923
IRExitBlock = TheLoop->contains(Term->getSuccessor(0))
927924
? Term->getSuccessor(1)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,51 +1791,33 @@ void VPlanTransforms::createInterleaveGroups(
17911791
}
17921792
}
17931793

1794-
void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE,
1795-
Loop *OrigLoop,
1796-
VPRecipeBuilder &RecipeBuilder) {
1794+
void VPlanTransforms::handleUncountableEarlyExit(
1795+
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
1796+
VPRecipeBuilder &RecipeBuilder) {
17971797
auto *LatchVPBB =
17981798
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getExiting());
17991799
VPBuilder Builder(LatchVPBB->getTerminator());
18001800
auto *MiddleVPBB = Plan.getMiddleBlock();
1801-
18021801
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
18031802

1804-
const SCEV *BackedgeTakenCount =
1805-
SE.getExitCount(OrigLoop, OrigLoop->getLoopLatch());
1806-
const SCEV *TripCount = SE.getTripCountFromExitCount(
1807-
BackedgeTakenCount, Plan.getCanonicalIV()->getScalarType(), OrigLoop);
1808-
VPValue *NewTC = vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE);
1809-
Plan.getTripCount()->replaceAllUsesWith(NewTC);
1810-
Plan.resetTripCount(NewTC);
1811-
18121803
VPValue *EarlyExitTaken = nullptr;
18131804
SmallVector<BasicBlock *> ExitingBBs;
18141805
OrigLoop->getExitingBlocks(ExitingBBs);
1806+
1807+
// Process all uncountable exiting blocks. For each exiting block, update the
1808+
// EarlyExitTaken, which tracks if any uncountable early exit has been taken.
1809+
// Also split the middle block and branch to the exit block for the early exit
1810+
// if it has been taken.
18151811
for (BasicBlock *Exiting : ExitingBBs) {
1812+
if (Exiting == OrigLoop->getLoopLatch())
1813+
continue;
1814+
18161815
auto *ExitingTerm = cast<BranchInst>(Exiting->getTerminator());
18171816
BasicBlock *TrueSucc = ExitingTerm->getSuccessor(0);
18181817
BasicBlock *FalseSucc = ExitingTerm->getSuccessor(1);
18191818
VPIRBasicBlock *VPExitBlock;
1820-
if (OrigLoop->getUniqueExitBlock() || Exiting == OrigLoop->getLoopLatch())
1821-
VPExitBlock = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);
1822-
else
1823-
VPExitBlock = VPIRBasicBlock::fromBasicBlock(
1824-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
1825-
1826-
for (VPRecipeBase &R : *VPExitBlock) {
1827-
auto *ExitIRI = cast<VPIRInstruction>(&R);
1828-
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
1829-
if (!ExitPhi)
1830-
break;
1831-
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(Exiting);
1832-
VPValue *V = RecipeBuilder.getVPValueOrAddLiveIn(IncomingValue);
1833-
ExitIRI->addOperand(V);
1834-
}
1835-
1836-
if (Exiting == OrigLoop->getLoopLatch()) {
1837-
continue;
1838-
}
1819+
VPExitBlock = VPIRBasicBlock::fromBasicBlock(
1820+
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
18391821

18401822
VPValue *M = RecipeBuilder.getBlockInMask(
18411823
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
@@ -1851,6 +1833,10 @@ void VPlanTransforms::convertToMultiCond(VPlan &Plan, ScalarEvolution &SE,
18511833
VPBuilder MiddleBuilder(NewMiddle);
18521834
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {EarlyExitTaken});
18531835
}
1836+
1837+
// Replace the condition controlling the exit from the vector loop with one
1838+
// exiting if either the original condition of the vector latch is true or any
1839+
// early exit has been taken.
18541840
auto *Term = dyn_cast<VPInstruction>(LatchVPBB->getTerminator());
18551841
auto *IsLatchExiting = Builder.createICmp(
18561842
CmpInst::ICMP_EQ, Term->getOperand(0), Term->getOperand(1));

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ struct VPlanTransforms {
124124
/// Remove dead recipes from \p Plan.
125125
static void removeDeadRecipes(VPlan &Plan);
126126

127-
static void convertToMultiCond(VPlan &Plan, ScalarEvolution &SE,
128-
Loop *OrigLoop,
129-
VPRecipeBuilder &RecipeBuilder);
127+
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
128+
Loop *OrigLoop,
129+
VPRecipeBuilder &RecipeBuilder);
130130
};
131131

132132
} // namespace llvm

0 commit comments

Comments
 (0)