Skip to content

Commit 18ea570

Browse files
committed
[LV][NFC] Clean up tail-folding check for early-exit loops
This patch moves the check for a single latch exit from computeMaxVF() to LoopVectorizationLegality::canFoldTailByMasking(), as it duplicates the logic when foldTailByMasking() returns false. It also introduces HasSingleLatchExit to prevent early-exit loops from entering code paths that assume non-predicated loops.
1 parent 4cb41d1 commit 18ea570

File tree

2 files changed

+11
-18
lines changed

2 files changed

+11
-18
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1924,6 +1924,14 @@ bool LoopVectorizationLegality::canFoldTailByMasking() const {
19241924
}
19251925
}
19261926

1927+
// The only loops we can vectorize without a scalar epilogue, are loops with
1928+
// a bottom-test and a single exiting block. We'd have to handle the fact
1929+
// that not every instruction executes on the last iteration. This will
1930+
// require a lane mask which varies through the vector loop body. (TODO)
1931+
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1932+
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
1933+
return false;
1934+
}
19271935
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
19281936

19291937
return true;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3987,22 +3987,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39873987
break;
39883988
}
39893989

3990-
// The only loops we can vectorize without a scalar epilogue, are loops with
3991-
// a bottom-test and a single exiting block. We'd have to handle the fact
3992-
// that not every instruction executes on the last iteration. This will
3993-
// require a lane mask which varies through the vector loop body. (TODO)
3994-
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
3995-
// If there was a tail-folding hint/switch, but we can't fold the tail by
3996-
// masking, fallback to a vectorization with a scalar epilogue.
3997-
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
3998-
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
3999-
"scalar epilogue instead.\n");
4000-
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
4001-
return computeFeasibleMaxVF(MaxTC, UserVF, false);
4002-
}
4003-
return FixedScalableVFPair::getNone();
4004-
}
4005-
40063990
// Now try the tail folding
40073991

40083992
// Invalidate interleave groups that require an epilogue if we can't mask
@@ -4049,7 +4033,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40494033
return Rem->isZero();
40504034
};
40514035

4052-
if (MaxPowerOf2RuntimeVF > 0u) {
4036+
bool HasSingleLatchExit = TheLoop->getExitingBlock() == TheLoop->getLoopLatch();
4037+
if (HasSingleLatchExit && MaxPowerOf2RuntimeVF > 0u) {
40534038
assert((UserVF.isNonZero() || isPowerOf2_32(*MaxPowerOf2RuntimeVF)) &&
40544039
"MaxFixedVF must be a power of 2");
40554040
if (NoScalarEpilogueNeeded(*MaxPowerOf2RuntimeVF)) {
@@ -4060,7 +4045,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40604045
}
40614046

40624047
auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop);
4063-
if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
4048+
if (HasSingleLatchExit && ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
40644049
if (MaxPowerOf2RuntimeVF > 0u) {
40654050
// If we have a low-trip-count, and the fixed-width VF is known to divide
40664051
// the trip count but the scalable factor does not, use the fixed-width

0 commit comments

Comments
 (0)