-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LoopVectorize] Enable more early exit vectorisation tests #117008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -3016,6 +3016,22 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | |||||
PSE.getSE()->forgetLoop(OrigLoop); | ||||||
PSE.getSE()->forgetBlockAndLoopDispositions(); | ||||||
|
||||||
// When dealing with uncountable early exits we create middle.split blocks | ||||||
// between the vector loop region and the exit block. These blocks need | ||||||
// adding to any outer loop. | ||||||
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); | ||||||
Loop *OuterLoop = OrigLoop->getParentLoop(); | ||||||
if (Legal->hasUncountableEarlyExit() && OuterLoop) { | ||||||
VPBasicBlock *MiddleVPBB = State.Plan->getMiddleBlock(); | ||||||
VPBlockBase *PredVPBB = MiddleVPBB->getSinglePredecessor(); | ||||||
while (PredVPBB && PredVPBB != VectorRegion) { | ||||||
BasicBlock *MiddleSplitBB = | ||||||
State.CFG.VPBB2IRBB[cast<VPBasicBlock>(PredVPBB)]; | ||||||
OuterLoop->addBasicBlockToLoop(MiddleSplitBB, *LI); | ||||||
PredVPBB = PredVPBB->getSinglePredecessor(); | ||||||
} | ||||||
} | ||||||
|
||||||
// After vectorization, the exit blocks of the original loop will have | ||||||
// additional predecessors. Invalidate SCEVs for the exit phis in case SE | ||||||
// looked through single-entry phis. | ||||||
|
@@ -3046,7 +3062,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | |||||
for (Instruction *PI : PredicatedInstructions) | ||||||
sinkScalarOperands(&*PI); | ||||||
|
||||||
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); | ||||||
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); | ||||||
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB]; | ||||||
|
||||||
|
@@ -4123,7 +4138,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { | |||||
// a bottom-test and a single exiting block. We'd have to handle the fact | ||||||
// that not every instruction executes on the last iteration. This will | ||||||
// require a lane mask which varies through the vector loop body. (TODO) | ||||||
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { | ||||||
if (Legal->hasUncountableEarlyExit() || | ||||||
TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { | ||||||
// If there was a tail-folding hint/switch, but we can't fold the tail by | ||||||
// masking, fallback to a vectorization with a scalar epilogue. | ||||||
if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) { | ||||||
|
@@ -4753,7 +4769,9 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization( | |||||
// Epilogue vectorization code has not been auditted to ensure it handles | ||||||
// non-latch exits properly. It may be fine, but it needs auditted and | ||||||
// tested. | ||||||
if (OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch()) | ||||||
// TODO: Add support for loops with an early exit. | ||||||
if (Legal->hasUncountableEarlyExit() || | ||||||
|
||||||
OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch()) | ||||||
return false; | ||||||
|
||||||
return true; | ||||||
|
@@ -5001,6 +5019,10 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, | |||||
if (!Legal->isSafeForAnyVectorWidth()) | ||||||
return 1; | ||||||
|
||||||
// We don't attempt to perform interleaving for early exit loops. | ||||||
if (Legal->hasUncountableEarlyExit()) | ||||||
return 1; | ||||||
|
||||||
|
||||||
auto BestKnownTC = getSmallBestKnownTC(PSE, TheLoop); | ||||||
const bool HasReductions = !Legal->getReductionVars().empty(); | ||||||
|
||||||
|
@@ -7813,11 +7835,14 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||
// 2.5 When vectorizing the epilogue, fix reduction and induction resume | ||||||
// values from the additional bypass block. | ||||||
if (VectorizingEpilogue) { | ||||||
assert(!ILV.Legal->hasUncountableEarlyExit() && | ||||||
"Epilogue vectorisation not yet supported with early exits"); | ||||||
BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock(); | ||||||
for (VPRecipeBase &R : *ExitVPBB) { | ||||||
fixReductionScalarResumeWhenVectorizingEpilog( | ||||||
&R, State, State.CFG.VPBB2IRBB[ExitVPBB], BypassBlock); | ||||||
} | ||||||
|
||||||
|
||||||
BasicBlock *PH = OrigLoop->getLoopPreheader(); | ||||||
for (const auto &[IVPhi, _] : Legal->getInductionVars()) { | ||||||
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH)); | ||||||
|
@@ -10177,13 +10202,33 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |||||
return false; | ||||||
} | ||||||
|
||||||
if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) { | ||||||
reportVectorizationFailure("Auto-vectorization of loops with uncountable " | ||||||
"early exit is not enabled", | ||||||
"Auto-vectorization of loops with uncountable " | ||||||
"early exit is not enabled", | ||||||
"UncountableEarlyExitLoopsDisabled", ORE, L); | ||||||
return false; | ||||||
if (LVL.hasUncountableEarlyExit()) { | ||||||
if (!EnableEarlyExitVectorization) { | ||||||
reportVectorizationFailure("Auto-vectorization of loops with uncountable " | ||||||
"early exit is not enabled", | ||||||
"Auto-vectorization of loops with uncountable " | ||||||
"early exit is not enabled", | ||||||
"UncountableEarlyExitLoopsDisabled", ORE, L); | ||||||
return false; | ||||||
} | ||||||
|
||||||
// Needed to prevent InnerLoopVectorizer::fixupIVUsers from crashing. | ||||||
|
||||||
for (BasicBlock *BB : L->blocks()) { | ||||||
for (Instruction &I : *BB) { | ||||||
for (User *U : I.users()) { | ||||||
|
||||||
Instruction *UI = cast<Instruction>(U); | ||||||
if (!L->contains(UI)) { | ||||||
reportVectorizationFailure( | ||||||
"Auto-vectorization of loops with uncountable " | ||||||
"early exit and live-outs is not yet supported", | ||||||
"Auto-vectorization of loop with uncountable " | ||||||
"early exit and live-outs is not yet supported", | ||||||
|
||||||
"UncountableEarlyExitLoopLiveOutsUnsupported", ORE, L); | ||||||
return false; | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
// Entrance to the VPlan-native vectorization path. Outer loops are processed | ||||||
|
@@ -10208,6 +10253,20 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |||||
if (UseInterleaved) | ||||||
IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI)); | ||||||
|
||||||
if (LVL.hasUncountableEarlyExit()) { | ||||||
BasicBlock *LoopLatch = L->getLoopLatch(); | ||||||
if (IAI.requiresScalarEpilogue() || | ||||||
llvm::any_of(LVL.getCountableExitingBlocks(), | ||||||
|
llvm::any_of(LVL.getCountableExitingBlocks(), | |
any_of(LVL.getCountableExitingBlocks(), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this needed? If there's an uncountable early exit, there won't be a single exiting block and the check below will be true (as there must be a latch)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right, the latch should always be non-null.