Skip to content

Commit 615cec7

Browse files
fhahnpawosm-arm
authored andcommitted
[LV] Move IV bypass value creation out of ILV (NFC)
createInductionAdditionalBypassValues is only used for epilogue vectorization now. Move it out of ILV, which means we do not have to thread through ExpandedSCEVs and also don't have to track the bypass values in ILV. Instead, directly create them if needed after executing the epilogue plan. This moves more the epilogue specific logic out of the generic executePlan.
1 parent c5dbd00 commit 615cec7

File tree

3 files changed

+63
-103
lines changed

3 files changed

+63
-103
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -442,17 +442,15 @@ class LoopVectorizationPlanner {
442442
/// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
443443
/// epilogue vector loop. It should be removed once the re-use issue has been
444444
/// fixed.
445-
/// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
446-
/// to re-use expansion results generated during main plan execution.
447445
///
448446
/// Returns a mapping of SCEVs to their expanded IR values.
449447
/// Note that this is a temporary workaround needed due to the current
450448
/// epilogue handling.
451-
DenseMap<const SCEV *, Value *>
452-
executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
453-
InnerLoopVectorizer &LB, DominatorTree *DT,
454-
bool VectorizingEpilogue,
455-
const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
449+
DenseMap<const SCEV *, Value *> executePlan(ElementCount VF, unsigned UF,
450+
VPlan &BestPlan,
451+
InnerLoopVectorizer &LB,
452+
DominatorTree *DT,
453+
bool VectorizingEpilogue);
456454

457455
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
458456
void printPlans(raw_ostream &O);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 57 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -494,11 +494,8 @@ class InnerLoopVectorizer {
494494
/// is generated around the vectorized (and scalar epilogue) loops consisting
495495
/// of various checks and bypasses. Return the pre-header block of the new
496496
/// loop. In the case of epilogue vectorization, this function is overriden to
497-
/// handle the more complex control flow around the loops. \p ExpandedSCEVs is
498-
/// used to look up SCEV expansions for expressions needed during skeleton
499-
/// creation.
500-
virtual BasicBlock *
501-
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);
497+
/// handle the more complex control flow around the loops.
498+
virtual BasicBlock *createVectorizedLoopSkeleton();
502499

503500
/// Fix the vectorized code, taking care of header phi's, and more.
504501
void fixVectorizedLoop(VPTransformState &State);
@@ -526,12 +523,6 @@ class InnerLoopVectorizer {
526523
/// count of the original loop for both main loop and epilogue vectorization.
527524
void setTripCount(Value *TC) { TripCount = TC; }
528525

529-
// Retrieve the additional bypass value associated with an original
530-
/// induction header phi.
531-
Value *getInductionAdditionalBypassValue(PHINode *OrigPhi) const {
532-
return Induction2AdditionalBypassValue.at(OrigPhi);
533-
}
534-
535526
/// Return the additional bypass block which targets the scalar loop by
536527
/// skipping the epilogue loop after completing the main loop.
537528
BasicBlock *getAdditionalBypassBlock() const {
@@ -568,11 +559,6 @@ class InnerLoopVectorizer {
568559
/// vector loop preheader, middle block and scalar preheader.
569560
void createVectorLoopSkeleton(StringRef Prefix);
570561

571-
/// Create and record the values for induction variables to resume coming from
572-
/// the additional bypass block.
573-
void createInductionAdditionalBypassValues(const SCEV2ValueTy &ExpandedSCEVs,
574-
Value *MainVectorTripCount);
575-
576562
/// Allow subclasses to override and print debug traces before/after vplan
577563
/// execution, when trace information is requested.
578564
virtual void printDebugTracesAtStart() {}
@@ -666,11 +652,6 @@ class InnerLoopVectorizer {
666652
/// for cleaning the checks, if vectorization turns out unprofitable.
667653
GeneratedRTChecks &RTChecks;
668654

669-
/// Mapping of induction phis to their additional bypass values. They
670-
/// need to be added as operands to phi nodes in the scalar loop preheader
671-
/// after the epilogue skeleton has been created.
672-
DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
673-
674655
/// The additional bypass block which conditionally skips over the epilogue
675656
/// loop after executing the main loop. Needed to resume inductions and
676657
/// reductions during epilogue vectorization.
@@ -733,16 +714,14 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
733714

734715
// Override this function to handle the more complex control flow around the
735716
// three loops.
736-
BasicBlock *
737-
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final {
738-
return createEpilogueVectorizedLoopSkeleton(ExpandedSCEVs);
717+
BasicBlock *createVectorizedLoopSkeleton() final {
718+
return createEpilogueVectorizedLoopSkeleton();
739719
}
740720

741721
/// The interface for creating a vectorized skeleton using one of two
742722
/// different strategies, each corresponding to one execution of the vplan
743723
/// as described above.
744-
virtual BasicBlock *
745-
createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) = 0;
724+
virtual BasicBlock *createEpilogueVectorizedLoopSkeleton() = 0;
746725

747726
/// Holds and updates state information required to vectorize the main loop
748727
/// and its epilogue in two separate passes. This setup helps us avoid
@@ -770,8 +749,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
770749
EPI, LVL, CM, BFI, PSI, Check, Plan) {}
771750
/// Implements the interface for creating a vectorized skeleton using the
772751
/// *main loop* strategy (ie the first pass of vplan execution).
773-
BasicBlock *
774-
createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
752+
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
775753

776754
protected:
777755
/// Emits an iteration count bypass check once for the main loop (when \p
@@ -801,8 +779,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
801779
}
802780
/// Implements the interface for creating a vectorized skeleton using the
803781
/// *epilogue loop* strategy (ie the second pass of vplan execution).
804-
BasicBlock *
805-
createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
782+
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
806783

807784
protected:
808785
/// Emits an iteration count bypass check after the main vector loop has
@@ -2679,44 +2656,7 @@ static void addFullyUnrolledInstructionsToIgnore(
26792656
}
26802657
}
26812658

2682-
void InnerLoopVectorizer::createInductionAdditionalBypassValues(
2683-
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) {
2684-
assert(MainVectorTripCount && "Must have bypass information");
2685-
2686-
Instruction *OldInduction = Legal->getPrimaryInduction();
2687-
IRBuilder<> BypassBuilder(getAdditionalBypassBlock(),
2688-
getAdditionalBypassBlock()->getFirstInsertionPt());
2689-
for (const auto &InductionEntry : Legal->getInductionVars()) {
2690-
PHINode *OrigPhi = InductionEntry.first;
2691-
const InductionDescriptor &II = InductionEntry.second;
2692-
Value *Step = getExpandedStep(II, ExpandedSCEVs);
2693-
// For the primary induction the additional bypass end value is known.
2694-
// Otherwise it is computed.
2695-
Value *EndValueFromAdditionalBypass = MainVectorTripCount;
2696-
if (OrigPhi != OldInduction) {
2697-
auto *BinOp = II.getInductionBinOp();
2698-
// Fast-math-flags propagate from the original induction instruction.
2699-
if (isa_and_nonnull<FPMathOperator>(BinOp))
2700-
BypassBuilder.setFastMathFlags(BinOp->getFastMathFlags());
2701-
2702-
// Compute the end value for the additional bypass.
2703-
EndValueFromAdditionalBypass =
2704-
emitTransformedIndex(BypassBuilder, MainVectorTripCount,
2705-
II.getStartValue(), Step, II.getKind(), BinOp);
2706-
EndValueFromAdditionalBypass->setName("ind.end");
2707-
}
2708-
2709-
// Store the bypass value here, as it needs to be added as operand to its
2710-
// scalar preheader phi node after the epilogue skeleton has been created.
2711-
// TODO: Directly add as extra operand to the VPResumePHI recipe.
2712-
assert(!Induction2AdditionalBypassValue.contains(OrigPhi) &&
2713-
"entry for OrigPhi already exits");
2714-
Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
2715-
}
2716-
}
2717-
2718-
BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2719-
const SCEV2ValueTy &ExpandedSCEVs) {
2659+
BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
27202660
/*
27212661
In this function we generate a new loop. The new loop will contain
27222662
the vectorized instructions while the old loop will continue to run the
@@ -7628,17 +7568,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
76287568

76297569
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76307570
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
7631-
InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue,
7632-
const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
7571+
InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) {
76337572
assert(BestVPlan.hasVF(BestVF) &&
76347573
"Trying to execute plan with unsupported VF");
76357574
assert(BestVPlan.hasUF(BestUF) &&
76367575
"Trying to execute plan with unsupported UF");
7637-
assert(
7638-
((VectorizingEpilogue && ExpandedSCEVs) ||
7639-
(!VectorizingEpilogue && !ExpandedSCEVs)) &&
7640-
"expanded SCEVs to reuse can only be used during epilogue vectorization");
7641-
76427576
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
76437577
// cost model is complete for better cost estimates.
76447578
VPlanTransforms::unrollByUF(BestVPlan, BestUF,
@@ -7670,8 +7604,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76707604
// middle block. The vector loop is created during VPlan execution.
76717605
VPBasicBlock *VectorPH =
76727606
cast<VPBasicBlock>(BestVPlan.getEntry()->getSingleSuccessor());
7673-
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(
7674-
ExpandedSCEVs ? *ExpandedSCEVs : State.ExpandedSCEVs);
7607+
7608+
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
76757609
if (VectorizingEpilogue)
76767610
VPlanTransforms::removeDeadRecipes(BestVPlan);
76777611

@@ -7712,8 +7646,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77127646
BestVPlan.execute(&State);
77137647

77147648
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
7715-
// 2.5 When vectorizing the epilogue, fix reduction and induction resume
7716-
// values from the additional bypass block.
7649+
// 2.5 When vectorizing the epilogue, fix reduction resume values from the
7650+
// additional bypass block.
77177651
if (VectorizingEpilogue) {
77187652
assert(!ILV.Legal->hasUncountableEarlyExit() &&
77197653
"Epilogue vectorisation not yet supported with early exits");
@@ -7722,12 +7656,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77227656
fixReductionScalarResumeWhenVectorizingEpilog(
77237657
&R, State, State.CFG.VPBB2IRBB[MiddleVPBB], BypassBlock);
77247658
}
7725-
BasicBlock *PH = OrigLoop->getLoopPreheader();
7726-
for (const auto &[IVPhi, _] : Legal->getInductionVars()) {
7727-
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
7728-
Value *V = ILV.getInductionAdditionalBypassValue(IVPhi);
7729-
Inc->setIncomingValueForBlock(BypassBlock, V);
7730-
}
77317659
}
77327660

77337661
// 2.6. Maintain Loop Hints
@@ -7789,8 +7717,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77897717

77907718
/// This function is partially responsible for generating the control flow
77917719
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7792-
BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7793-
const SCEV2ValueTy &ExpandedSCEVs) {
7720+
BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
77947721
createVectorLoopSkeleton("");
77957722

77967723
// Generate the code to check the minimum iteration count of the vector
@@ -7900,8 +7827,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
79007827
/// This function is partially responsible for generating the control flow
79017828
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
79027829
BasicBlock *
7903-
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7904-
const SCEV2ValueTy &ExpandedSCEVs) {
7830+
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
79057831
createVectorLoopSkeleton("vec.epilog.");
79067832

79077833
// Now, compare the remaining count and if there aren't enough iterations to
@@ -7969,11 +7895,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79697895
Phi->removeIncomingValue(EPI.MemSafetyCheck);
79707896
}
79717897

7972-
// Generate bypass values from the additional bypass block. Note that when the
7973-
// vectorized epilogue is skipped due to iteration count check, then the
7974-
// resume value for the induction variable comes from the trip count of the
7975-
// main vector loop, passed as the second argument.
7976-
createInductionAdditionalBypassValues(ExpandedSCEVs, EPI.VectorTripCount);
79777898
return LoopVectorPreHeader;
79787899
}
79797900

@@ -10314,6 +10235,33 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1031410235
}
1031510236
}
1031610237

10238+
// Generate bypass values from the additional bypass block. Note that when the
10239+
// vectorized epilogue is skipped due to iteration count check, then the
10240+
// resume value for the induction variable comes from the trip count of the
10241+
// main vector loop, passed as the second argument.
10242+
static Value *createInductionAdditionalBypassValues(
10243+
PHINode *OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder,
10244+
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
10245+
Instruction *OldInduction) {
10246+
Value *Step = getExpandedStep(II, ExpandedSCEVs);
10247+
// For the primary induction the additional bypass end value is known.
10248+
// Otherwise it is computed.
10249+
Value *EndValueFromAdditionalBypass = MainVectorTripCount;
10250+
if (OrigPhi != OldInduction) {
10251+
auto *BinOp = II.getInductionBinOp();
10252+
// Fast-math-flags propagate from the original induction instruction.
10253+
if (isa_and_nonnull<FPMathOperator>(BinOp))
10254+
BypassBuilder.setFastMathFlags(BinOp->getFastMathFlags());
10255+
10256+
// Compute the end value for the additional bypass.
10257+
EndValueFromAdditionalBypass =
10258+
emitTransformedIndex(BypassBuilder, MainVectorTripCount,
10259+
II.getStartValue(), Step, II.getKind(), BinOp);
10260+
EndValueFromAdditionalBypass->setName("ind.end");
10261+
}
10262+
return EndValueFromAdditionalBypass;
10263+
}
10264+
1031710265
bool LoopVectorizePass::processLoop(Loop *L) {
1031810266
assert((EnableVPlanNativePath || L->isInnermost()) &&
1031910267
"VPlan-native path is not enabled. Only process inner loops.");
@@ -10699,7 +10647,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1069910647
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
1070010648

1070110649
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
10702-
DT, true, &ExpandedSCEVs);
10650+
DT, true);
10651+
10652+
// Fix induction resume values from the additional bypass block.
10653+
BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock();
10654+
IRBuilder<> BypassBuilder(BypassBlock,
10655+
BypassBlock->getFirstInsertionPt());
10656+
BasicBlock *PH = L->getLoopPreheader();
10657+
for (const auto &[IVPhi, II] : LVL.getInductionVars()) {
10658+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10659+
Value *V = createInductionAdditionalBypassValues(
10660+
IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount,
10661+
LVL.getPrimaryInduction());
10662+
// TODO: Directly add as extra operand to the VPResumePHI recipe.
10663+
Inc->setIncomingValueForBlock(BypassBlock, V);
10664+
}
1070310665
++LoopsEpilogueVectorized;
1070410666

1070510667
if (!MainILV.areSafetyChecksAdded())

llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 {
183183
; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer
184184
; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
185185
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY50:%.*]]
186-
; CHECK: vec.epilog.vector.body52:
186+
; CHECK: vec.epilog.vector.body50:
187187
; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ]
188188
; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ]
189189
; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ]

0 commit comments

Comments
 (0)