Skip to content

Commit 42497c0

Browse files
committed
[LV] Move two fns into VPlanTransforms (NFC)
1 parent 148a835 commit 42497c0

File tree

3 files changed

+218
-208
lines changed

3 files changed

+218
-208
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -8247,211 +8247,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82478247
}
82488248
}
82498249

8250-
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8251-
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8252-
/// the end value of the induction.
8253-
static VPInstruction *addResumePhiRecipeForInduction(
8254-
VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8255-
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
8256-
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8257-
// Truncated wide inductions resume from the last lane of their vector value
8258-
// in the last vector iteration which is handled elsewhere.
8259-
if (WideIntOrFp && WideIntOrFp->getTruncInst())
8260-
return nullptr;
8261-
8262-
VPValue *Start = WideIV->getStartValue();
8263-
VPValue *Step = WideIV->getStepValue();
8264-
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
8265-
VPValue *EndValue = VectorTC;
8266-
if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
8267-
EndValue = VectorPHBuilder.createDerivedIV(
8268-
ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
8269-
Start, VectorTC, Step);
8270-
}
8271-
8272-
// EndValue is derived from the vector trip count (which has the same type as
8273-
// the widest induction) and thus may be wider than the induction here.
8274-
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
8275-
if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
8276-
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
8277-
ScalarTypeOfWideIV,
8278-
WideIV->getDebugLoc());
8279-
}
8280-
8281-
auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
8282-
{EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
8283-
return ResumePhiRecipe;
8284-
}
8285-
8286-
/// Create resume phis in the scalar preheader for first-order recurrences,
8287-
/// reductions and inductions, and update the VPIRInstructions wrapping the
8288-
/// original phis in the scalar header. End values for inductions are added to
8289-
/// \p IVEndValues.
8290-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8291-
DenseMap<VPValue *, VPValue *> &IVEndValues) {
8292-
VPTypeAnalysis TypeInfo(Plan);
8293-
auto *ScalarPH = Plan.getScalarPreheader();
8294-
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
8295-
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8296-
VPBuilder VectorPHBuilder(
8297-
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
8298-
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8299-
VPBuilder ScalarPHBuilder(ScalarPH);
8300-
for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader()->phis()) {
8301-
auto *ScalarPhiIRI = cast<VPIRPhi>(&ScalarPhiR);
8302-
8303-
// TODO: Extract final value from induction recipe initially, optimize to
8304-
// pre-computed end value together in optimizeInductionExitUsers.
8305-
auto *VectorPhiR =
8306-
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
8307-
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8308-
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
8309-
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8310-
&Plan.getVectorTripCount())) {
8311-
assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
8312-
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
8313-
ScalarPhiIRI->addOperand(ResumePhi);
8314-
continue;
8315-
}
8316-
// TODO: Also handle truncated inductions here. Computing end-values
8317-
// separately should be done as VPlan-to-VPlan optimization, after
8318-
// legalizing all resume values to use the last lane from the loop.
8319-
assert(cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst() &&
8320-
"should only skip truncated wide inductions");
8321-
continue;
8322-
}
8323-
8324-
// The backedge value provides the value to resume coming out of a loop,
8325-
// which for FORs is a vector whose last element needs to be extracted. The
8326-
// start value provides the value if the loop is bypassed.
8327-
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8328-
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8329-
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
8330-
"Cannot handle loops with uncountable early exits");
8331-
if (IsFOR)
8332-
ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
8333-
VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
8334-
"vector.recur.extract");
8335-
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
8336-
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
8337-
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8338-
ScalarPhiIRI->addOperand(ResumePhiR);
8339-
}
8340-
}
8341-
8342-
/// Handle users in the exit block for first order reductions in the original
8343-
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
8344-
/// users in the original exit block using the VPIRInstruction wrapping to the
8345-
/// LCSSA phi.
8346-
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range) {
8347-
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8348-
auto *ScalarPHVPBB = Plan.getScalarPreheader();
8349-
auto *MiddleVPBB = Plan.getMiddleBlock();
8350-
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8351-
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8352-
8353-
auto IsScalableOne = [](ElementCount VF) -> bool {
8354-
return VF == ElementCount::getScalable(1);
8355-
};
8356-
8357-
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
8358-
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
8359-
if (!FOR)
8360-
continue;
8361-
8362-
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
8363-
"Cannot handle loops with uncountable early exits");
8364-
8365-
// This is the second phase of vectorizing first-order recurrences, creating
8366-
// extract for users outside the loop. An overview of the transformation is
8367-
// described below. Suppose we have the following loop with some use after
8368-
// the loop of the last a[i-1],
8369-
//
8370-
// for (int i = 0; i < n; ++i) {
8371-
// t = a[i - 1];
8372-
// b[i] = a[i] - t;
8373-
// }
8374-
// use t;
8375-
//
8376-
// There is a first-order recurrence on "a". For this loop, the shorthand
8377-
// scalar IR looks like:
8378-
//
8379-
// scalar.ph:
8380-
// s.init = a[-1]
8381-
// br scalar.body
8382-
//
8383-
// scalar.body:
8384-
// i = phi [0, scalar.ph], [i+1, scalar.body]
8385-
// s1 = phi [s.init, scalar.ph], [s2, scalar.body]
8386-
// s2 = a[i]
8387-
// b[i] = s2 - s1
8388-
// br cond, scalar.body, exit.block
8389-
//
8390-
// exit.block:
8391-
// use = lcssa.phi [s1, scalar.body]
8392-
//
8393-
// In this example, s1 is a recurrence because it's value depends on the
8394-
// previous iteration. In the first phase of vectorization, we created a
8395-
// VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
8396-
// for users in the scalar preheader and exit block.
8397-
//
8398-
// vector.ph:
8399-
// v_init = vector(..., ..., ..., a[-1])
8400-
// br vector.body
8401-
//
8402-
// vector.body
8403-
// i = phi [0, vector.ph], [i+4, vector.body]
8404-
// v1 = phi [v_init, vector.ph], [v2, vector.body]
8405-
// v2 = a[i, i+1, i+2, i+3]
8406-
// b[i] = v2 - v1
8407-
// // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
8408-
// b[i, i+1, i+2, i+3] = v2 - v1
8409-
// br cond, vector.body, middle.block
8410-
//
8411-
// middle.block:
8412-
// vector.recur.extract.for.phi = v2(2)
8413-
// vector.recur.extract = v2(3)
8414-
// br cond, scalar.ph, exit.block
8415-
//
8416-
// scalar.ph:
8417-
// scalar.recur.init = phi [vector.recur.extract, middle.block],
8418-
// [s.init, otherwise]
8419-
// br scalar.body
8420-
//
8421-
// scalar.body:
8422-
// i = phi [0, scalar.ph], [i+1, scalar.body]
8423-
// s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
8424-
// s2 = a[i]
8425-
// b[i] = s2 - s1
8426-
// br cond, scalar.body, exit.block
8427-
//
8428-
// exit.block:
8429-
// lo = lcssa.phi [s1, scalar.body],
8430-
// [vector.recur.extract.for.phi, middle.block]
8431-
//
8432-
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
8433-
// Extract the penultimate value of the recurrence and use it as operand for
8434-
// the VPIRInstruction modeling the phi.
8435-
for (VPUser *U : FOR->users()) {
8436-
using namespace llvm::VPlanPatternMatch;
8437-
if (!match(U, m_ExtractLastElement(m_Specific(FOR))))
8438-
continue;
8439-
// For VF vscale x 1, if vscale = 1, we are unable to extract the
8440-
// penultimate value of the recurrence. Instead we rely on the existing
8441-
// extract of the last element from the result of
8442-
// VPInstruction::FirstOrderRecurrenceSplice.
8443-
// TODO: Consider vscale_range info and UF.
8444-
if (LoopVectorizationPlanner::getDecisionAndClampRange(IsScalableOne,
8445-
Range))
8446-
return;
8447-
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
8448-
VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()},
8449-
{}, "vector.recur.extract.for.phi");
8450-
cast<VPInstruction>(U)->replaceAllUsesWith(PenultimateElement);
8451-
}
8452-
}
8453-
}
8454-
84558250
VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84568251
VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
84578252

@@ -8644,9 +8439,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86448439
R->setOperand(1, WideIV->getStepValue());
86458440
}
86468441

8647-
addExitUsersForFirstOrderRecurrences(*Plan, Range);
8442+
VPlanTransforms::runPass(
8443+
VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range);
86488444
DenseMap<VPValue *, VPValue *> IVEndValues;
8649-
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
8445+
VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan,
8446+
RecipeBuilder, IVEndValues);
86508447

86518448
// ---------------------------------------------------------------------------
86528449
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8757,7 +8554,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
87578554
DenseMap<VPValue *, VPValue *> IVEndValues;
87588555
// TODO: IVEndValues are not used yet in the native path, to optimize exit
87598556
// values.
8760-
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
8557+
VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan,
8558+
RecipeBuilder, IVEndValues);
87618559

87628560
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
87638561
return Plan;

0 commit comments

Comments
 (0)