Skip to content

Commit bf4486e

Browse files
committed
[LV] Move fixing reduction resumes for epilogue out of executePlan.(NFC)
Move fixing up reduction resume values out of the general ::executePlan and perform it together with updating induction resume values. This also allows moving additional bypass block handling to EpilogueVectorizerEpilogueLoop.
1 parent 6980d3b commit bf4486e

File tree

1 file changed

+55
-53
lines changed

1 file changed

+55
-53
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 55 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -532,14 +532,6 @@ class InnerLoopVectorizer {
532532
/// count of the original loop for both main loop and epilogue vectorization.
533533
void setTripCount(Value *TC) { TripCount = TC; }
534534

535-
/// Return the additional bypass block which targets the scalar loop by
536-
/// skipping the epilogue loop after completing the main loop.
537-
BasicBlock *getAdditionalBypassBlock() const {
538-
assert(AdditionalBypassBlock &&
539-
"Trying to access AdditionalBypassBlock but it has not been set");
540-
return AdditionalBypassBlock;
541-
}
542-
543535
protected:
544536
friend class LoopVectorizationPlanner;
545537

@@ -602,11 +594,6 @@ class InnerLoopVectorizer {
602594
/// for cleaning the checks, if vectorization turns out unprofitable.
603595
GeneratedRTChecks &RTChecks;
604596

605-
/// The additional bypass block which conditionally skips over the epilogue
606-
/// loop after executing the main loop. Needed to resume inductions and
607-
/// reductions during epilogue vectorization.
608-
BasicBlock *AdditionalBypassBlock = nullptr;
609-
610597
VPlan &Plan;
611598

612599
/// The vector preheader block of \p Plan, used as target for check blocks
@@ -711,6 +698,11 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
711698
// vectorization of *epilogue* loops in the process of vectorizing loops and
712699
// their epilogues.
713700
class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
701+
/// The additional bypass block which conditionally skips over the epilogue
702+
/// loop after executing the main loop. Needed to resume inductions and
703+
/// reductions during epilogue vectorization.
704+
BasicBlock *AdditionalBypassBlock = nullptr;
705+
714706
public:
715707
EpilogueVectorizerEpilogueLoop(
716708
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
@@ -727,6 +719,14 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
727719
/// *epilogue loop* strategy (i.e., the second pass of VPlan execution).
728720
BasicBlock *createVectorizedLoopSkeleton() final;
729721

722+
/// Return the additional bypass block which targets the scalar loop by
723+
/// skipping the epilogue loop after completing the main loop.
724+
BasicBlock *getAdditionalBypassBlock() const {
725+
assert(AdditionalBypassBlock &&
726+
"Trying to access AdditionalBypassBlock but it has not been set");
727+
return AdditionalBypassBlock;
728+
}
729+
730730
protected:
731731
/// Emits an iteration count bypass check after the main vector loop has
732732
/// finished to see if there are any iterations left to execute by either
@@ -7139,7 +7139,7 @@ static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
71397139
// epilog loop, fix the reduction's scalar PHI node by adding the incoming value
71407140
// from the main vector loop.
71417141
static void fixReductionScalarResumeWhenVectorizingEpilog(
7142-
VPPhi *EpiResumePhiR, VPTransformState &State, BasicBlock *BypassBlock) {
7142+
VPPhi *EpiResumePhiR, PHINode &EpiResumePhi, BasicBlock *BypassBlock) {
71437143
// Get the VPInstruction computing the reduction result in the middle block.
71447144
// The first operand may not be from the middle block if it is not connected
71457145
// to the scalar preheader. In that case, there's nothing to fix.
@@ -7194,8 +7194,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
71947194
// When fixing reductions in the epilogue loop we should already have
71957195
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
71967196
// over the incoming values correctly.
7197-
auto *EpiResumePhi = cast<PHINode>(State.get(EpiResumePhiR, true));
7198-
EpiResumePhi->setIncomingValueForBlock(
7197+
EpiResumePhi.setIncomingValueForBlock(
71997198
BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock));
72007199
}
72017200

@@ -7306,31 +7305,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
73067305

73077306
BestVPlan.execute(&State);
73087307

7309-
// 2.5 When vectorizing the epilogue, fix reduction resume values from the
7310-
// additional bypass block.
7311-
if (VectorizingEpilogue) {
7312-
assert(!BestVPlan.hasEarlyExit() &&
7313-
"Epilogue vectorisation not yet supported with early exits");
7314-
BasicBlock *PH = OrigLoop->getLoopPreheader();
7315-
BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock();
7316-
for (auto *Pred : predecessors(PH)) {
7317-
for (PHINode &Phi : PH->phis()) {
7318-
if (Phi.getBasicBlockIndex(Pred) != -1)
7319-
continue;
7320-
Phi.addIncoming(Phi.getIncomingValueForBlock(BypassBlock), Pred);
7321-
}
7322-
}
7323-
VPBasicBlock *ScalarPH = BestVPlan.getScalarPreheader();
7324-
if (ScalarPH->hasPredecessors()) {
7325-
// If ScalarPH has predecessors, we may need to update its reduction
7326-
// resume values.
7327-
for (VPRecipeBase &R : ScalarPH->phis()) {
7328-
fixReductionScalarResumeWhenVectorizingEpilog(cast<VPPhi>(&R), State,
7329-
BypassBlock);
7330-
}
7331-
}
7332-
}
7333-
73347308
// 2.6. Maintain Loop Hints
73357309
// Keep all loop hints from the original loop on the vector loop (we'll
73367310
// replace the vectorizer-specific hints below).
@@ -9851,6 +9825,43 @@ static Value *createInductionAdditionalBypassValues(
98519825
return EndValueFromAdditionalBypass;
98529826
}
98539827

9828+
static void fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L,
9829+
VPlan &BestEpiPlan,
9830+
LoopVectorizationLegality &LVL,
9831+
const SCEV2ValueTy &ExpandedSCEVs,
9832+
Value *MainVectorTripCount) {
9833+
// Fix reduction resume values from the additional bypass block.
9834+
BasicBlock *PH = L->getLoopPreheader();
9835+
for (auto *Pred : predecessors(PH)) {
9836+
for (PHINode &Phi : PH->phis()) {
9837+
if (Phi.getBasicBlockIndex(Pred) != -1)
9838+
continue;
9839+
Phi.addIncoming(Phi.getIncomingValueForBlock(BypassBlock), Pred);
9840+
}
9841+
}
9842+
auto *ScalarPH = cast<VPIRBasicBlock>(BestEpiPlan.getScalarPreheader());
9843+
if (ScalarPH->hasPredecessors()) {
9844+
// If ScalarPH has predecessors, we may need to update its reduction
9845+
// resume values.
9846+
for (const auto &[R, IRPhi] :
9847+
zip(ScalarPH->phis(), ScalarPH->getIRBasicBlock()->phis())) {
9848+
fixReductionScalarResumeWhenVectorizingEpilog(cast<VPPhi>(&R), IRPhi,
9849+
BypassBlock);
9850+
}
9851+
}
9852+
9853+
// Fix induction resume values from the additional bypass block.
9854+
IRBuilder<> BypassBuilder(BypassBlock, BypassBlock->getFirstInsertionPt());
9855+
for (const auto &[IVPhi, II] : LVL.getInductionVars()) {
9856+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
9857+
Value *V = createInductionAdditionalBypassValues(
9858+
IVPhi, II, BypassBuilder, ExpandedSCEVs, MainVectorTripCount,
9859+
LVL.getPrimaryInduction());
9860+
// TODO: Directly add as extra operand to the VPResumePHI recipe.
9861+
Inc->setIncomingValueForBlock(BypassBlock, V);
9862+
}
9863+
}
9864+
98549865
bool LoopVectorizePass::processLoop(Loop *L) {
98559866
assert((EnableVPlanNativePath || L->isInnermost()) &&
98569867
"VPlan-native path is not enabled. Only process inner loops.");
@@ -10227,18 +10238,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1022710238
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
1022810239
true);
1022910240

10230-
// Fix induction resume values from the additional bypass block.
10231-
BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock();
10232-
IRBuilder<> BypassBuilder(BypassBlock, BypassBlock->getFirstInsertionPt());
10233-
BasicBlock *PH = L->getLoopPreheader();
10234-
for (const auto &[IVPhi, II] : LVL.getInductionVars()) {
10235-
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10236-
Value *V = createInductionAdditionalBypassValues(
10237-
IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount,
10238-
LVL.getPrimaryInduction());
10239-
// TODO: Directly add as extra operand to the VPResumePHI recipe.
10240-
Inc->setIncomingValueForBlock(BypassBlock, V);
10241-
}
10241+
fixScalarResumeValuesFromBypass(EpilogILV.getAdditionalBypassBlock(), L,
10242+
BestEpiPlan, LVL, ExpandedSCEVs,
10243+
EPI.VectorTripCount);
1024210244
++LoopsEpilogueVectorized;
1024310245

1024410246
if (!Checks.hasChecks())

0 commit comments

Comments
 (0)