Skip to content

Commit 78a8903

Browse files
committed
[LV] Outer-loop vectorization in the default vectorizer codepath
1 parent 66556d5 commit 78a8903

File tree

9 files changed

+1843
-55
lines changed

9 files changed

+1843
-55
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,10 @@ class LoopVectorizationLegality {
642642
/// Keep track of the loop edge to an uncountable exit, comprising a pair
643643
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
644644
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
645+
646+
/// Contains true for a nested loop if it or any of its parents up
647+
/// to the loop to vectorize needs a inner-loop active lane mask.
648+
mutable DenseMap<const Loop *, bool> InnerLoopsNeedingPredication;
645649
};
646650

647651
} // namespace llvm

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,11 @@ bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
572572
if (VF.isScalar())
573573
return true;
574574

575+
// The SCEVAddRecForUniformityRewriter does not support accesses to addresses
576+
// invariant w.r.t. the vectorized loop but with recurrences of inner loops.
577+
if (!TheLoop->isInnermost())
578+
return false;
579+
575580
// Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
576581
// never considered uniform.
577582
auto *SE = PSE.getSE();
@@ -1207,8 +1212,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
12071212
});
12081213
}
12091214

1210-
if (!LAI->canVectorizeMemory())
1211-
return canVectorizeIndirectUnsafeDependences();
1215+
if (!LAI->canVectorizeMemory()) {
1216+
if (canVectorizeIndirectUnsafeDependences())
1217+
return true;
1218+
1219+
return false;
1220+
}
12121221

12131222
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
12141223
reportVectorizationFailure("We don't allow storing to uniform addresses",
@@ -1403,7 +1412,31 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
14031412
"Uncountable exiting block must be a direct predecessor of latch");
14041413
return BB == Latch;
14051414
}
1406-
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1415+
1416+
if (LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT))
1417+
return true;
1418+
1419+
// Blocks in inner loops need predication if the inner loop trip-count
1420+
// is not invariant to the vectorized loop.
1421+
if (!TheLoop->isInnermost()) {
1422+
Loop *BBLoop = LI->getLoopFor(BB);
1423+
if (BBLoop != TheLoop) {
1424+
if (auto Iter = InnerLoopsNeedingPredication.find(BBLoop);
1425+
Iter != InnerLoopsNeedingPredication.end())
1426+
return Iter->second;
1427+
1428+
for (Loop *L = BBLoop; L != TheLoop; L = L->getParentLoop())
1429+
if (!isUniformLoop(L, TheLoop)) {
1430+
InnerLoopsNeedingPredication[BBLoop] = true;
1431+
return true;
1432+
}
1433+
1434+
InnerLoopsNeedingPredication[BBLoop] = false;
1435+
return false;
1436+
}
1437+
}
1438+
1439+
return false;
14071440
}
14081441

14091442
bool LoopVectorizationLegality::blockCanBePredicated(
@@ -1537,9 +1570,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
15371570
// Helper function to canVectorizeLoopNestCFG.
15381571
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
15391572
bool UseVPlanNativePath) {
1540-
assert((UseVPlanNativePath || Lp->isInnermost()) &&
1541-
"VPlan-native path is not enabled.");
1542-
15431573
// TODO: ORE should be improved to show more accurate information when an
15441574
// outer loop can't be vectorized because a nested loop is not understood or
15451575
// legal. Something like: "outer_loop_location: loop not vectorized:
@@ -1573,6 +1603,23 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
15731603
return false;
15741604
}
15751605

1606+
if (Lp != TheLoop && !UseVPlanNativePath) {
1607+
// Inner loops must be in loop-simplify form with the latch block being
1608+
// also the only exiting block and a dedicated exit.
1609+
BasicBlock *Exiting = Lp->getExitingBlock();
1610+
if (!Lp->isLoopSimplifyForm() || !Exiting ||
1611+
Exiting != Lp->getLoopLatch() || !Lp->isLCSSAForm(*DT)) {
1612+
reportVectorizationFailure(
1613+
"The inner loops must exit through their latch",
1614+
"loop control flow is not understood by vectorizer",
1615+
"CFGNotUnderstood", ORE, TheLoop);
1616+
if (DoExtraAnalysis)
1617+
Result = false;
1618+
else
1619+
return false;
1620+
}
1621+
}
1622+
15761623
return Result;
15771624
}
15781625

@@ -1775,9 +1822,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
17751822

17761823
// Specific checks for outer loops. We skip the remaining legal checks at this
17771824
// point because they don't support outer loops.
1778-
if (!TheLoop->isInnermost()) {
1779-
assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1780-
1825+
if (!TheLoop->isInnermost() && UseVPlanNativePath) {
17811826
if (!canVectorizeOuterLoop()) {
17821827
reportVectorizationFailure("Unsupported outer loop",
17831828
"UnsupportedOuterLoop", ORE, TheLoop);
@@ -1790,7 +1835,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
17901835
return Result;
17911836
}
17921837

1793-
assert(TheLoop->isInnermost() && "Inner loop expected.");
17941838
// Check if we can if-convert non-single-bb loops.
17951839
unsigned NumBlocks = TheLoop->getNumBlocks();
17961840
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
@@ -1811,7 +1855,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
18111855
}
18121856

18131857
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1814-
if (TheLoop->getExitingBlock()) {
1858+
if (TheLoop->getExitingBlock() || !TheLoop->isInnermost()) {
18151859
reportVectorizationFailure("Cannot vectorize uncountable loop",
18161860
"UnsupportedUncountableLoop", ORE, TheLoop);
18171861
if (DoExtraAnalysis)

0 commit comments

Comments
 (0)