Skip to content

Commit 93073af

Browse files
authored
[LV] Move 3 functions into VPlanTransforms (NFC) (#158644)
Two of them are actually transforms, and the third is a dependent static.
1 parent 4ab2597 commit 93073af

File tree

3 files changed

+218
-208
lines changed

3 files changed

+218
-208
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -8201,211 +8201,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82018201
}
82028202
}
82038203

8204-
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8205-
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8206-
/// the end value of the induction.
8207-
static VPInstruction *addResumePhiRecipeForInduction(
8208-
VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8209-
VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
8210-
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8211-
// Truncated wide inductions resume from the last lane of their vector value
8212-
// in the last vector iteration which is handled elsewhere.
8213-
if (WideIntOrFp && WideIntOrFp->getTruncInst())
8214-
return nullptr;
8215-
8216-
VPValue *Start = WideIV->getStartValue();
8217-
VPValue *Step = WideIV->getStepValue();
8218-
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
8219-
VPValue *EndValue = VectorTC;
8220-
if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
8221-
EndValue = VectorPHBuilder.createDerivedIV(
8222-
ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
8223-
Start, VectorTC, Step);
8224-
}
8225-
8226-
// EndValue is derived from the vector trip count (which has the same type as
8227-
// the widest induction) and thus may be wider than the induction here.
8228-
Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
8229-
if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
8230-
EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
8231-
ScalarTypeOfWideIV,
8232-
WideIV->getDebugLoc());
8233-
}
8234-
8235-
auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
8236-
{EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
8237-
return ResumePhiRecipe;
8238-
}
8239-
8240-
/// Create resume phis in the scalar preheader for first-order recurrences,
8241-
/// reductions and inductions, and update the VPIRInstructions wrapping the
8242-
/// original phis in the scalar header. End values for inductions are added to
8243-
/// \p IVEndValues.
8244-
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
8245-
DenseMap<VPValue *, VPValue *> &IVEndValues) {
8246-
VPTypeAnalysis TypeInfo(Plan);
8247-
auto *ScalarPH = Plan.getScalarPreheader();
8248-
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
8249-
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8250-
VPBuilder VectorPHBuilder(
8251-
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
8252-
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8253-
VPBuilder ScalarPHBuilder(ScalarPH);
8254-
for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader()->phis()) {
8255-
auto *ScalarPhiIRI = cast<VPIRPhi>(&ScalarPhiR);
8256-
8257-
// TODO: Extract final value from induction recipe initially, optimize to
8258-
// pre-computed end value together in optimizeInductionExitUsers.
8259-
auto *VectorPhiR =
8260-
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
8261-
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8262-
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
8263-
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8264-
&Plan.getVectorTripCount())) {
8265-
assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
8266-
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
8267-
ScalarPhiIRI->addOperand(ResumePhi);
8268-
continue;
8269-
}
8270-
// TODO: Also handle truncated inductions here. Computing end-values
8271-
// separately should be done as VPlan-to-VPlan optimization, after
8272-
// legalizing all resume values to use the last lane from the loop.
8273-
assert(cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst() &&
8274-
"should only skip truncated wide inductions");
8275-
continue;
8276-
}
8277-
8278-
// The backedge value provides the value to resume coming out of a loop,
8279-
// which for FORs is a vector whose last element needs to be extracted. The
8280-
// start value provides the value if the loop is bypassed.
8281-
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8282-
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8283-
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
8284-
"Cannot handle loops with uncountable early exits");
8285-
if (IsFOR)
8286-
ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
8287-
VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
8288-
"vector.recur.extract");
8289-
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
8290-
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
8291-
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8292-
ScalarPhiIRI->addOperand(ResumePhiR);
8293-
}
8294-
}
8295-
8296-
/// Handle users in the exit block for first order reductions in the original
8297-
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
8298-
/// users in the original exit block using the VPIRInstruction wrapping to the
8299-
/// LCSSA phi.
8300-
static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range) {
8301-
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8302-
auto *ScalarPHVPBB = Plan.getScalarPreheader();
8303-
auto *MiddleVPBB = Plan.getMiddleBlock();
8304-
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8305-
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8306-
8307-
auto IsScalableOne = [](ElementCount VF) -> bool {
8308-
return VF == ElementCount::getScalable(1);
8309-
};
8310-
8311-
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
8312-
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
8313-
if (!FOR)
8314-
continue;
8315-
8316-
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
8317-
"Cannot handle loops with uncountable early exits");
8318-
8319-
// This is the second phase of vectorizing first-order recurrences, creating
8320-
// extract for users outside the loop. An overview of the transformation is
8321-
// described below. Suppose we have the following loop with some use after
8322-
// the loop of the last a[i-1],
8323-
//
8324-
// for (int i = 0; i < n; ++i) {
8325-
// t = a[i - 1];
8326-
// b[i] = a[i] - t;
8327-
// }
8328-
// use t;
8329-
//
8330-
// There is a first-order recurrence on "a". For this loop, the shorthand
8331-
// scalar IR looks like:
8332-
//
8333-
// scalar.ph:
8334-
// s.init = a[-1]
8335-
// br scalar.body
8336-
//
8337-
// scalar.body:
8338-
// i = phi [0, scalar.ph], [i+1, scalar.body]
8339-
// s1 = phi [s.init, scalar.ph], [s2, scalar.body]
8340-
// s2 = a[i]
8341-
// b[i] = s2 - s1
8342-
// br cond, scalar.body, exit.block
8343-
//
8344-
// exit.block:
8345-
// use = lcssa.phi [s1, scalar.body]
8346-
//
8347-
// In this example, s1 is a recurrence because it's value depends on the
8348-
// previous iteration. In the first phase of vectorization, we created a
8349-
// VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
8350-
// for users in the scalar preheader and exit block.
8351-
//
8352-
// vector.ph:
8353-
// v_init = vector(..., ..., ..., a[-1])
8354-
// br vector.body
8355-
//
8356-
// vector.body
8357-
// i = phi [0, vector.ph], [i+4, vector.body]
8358-
// v1 = phi [v_init, vector.ph], [v2, vector.body]
8359-
// v2 = a[i, i+1, i+2, i+3]
8360-
// b[i] = v2 - v1
8361-
// // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
8362-
// b[i, i+1, i+2, i+3] = v2 - v1
8363-
// br cond, vector.body, middle.block
8364-
//
8365-
// middle.block:
8366-
// vector.recur.extract.for.phi = v2(2)
8367-
// vector.recur.extract = v2(3)
8368-
// br cond, scalar.ph, exit.block
8369-
//
8370-
// scalar.ph:
8371-
// scalar.recur.init = phi [vector.recur.extract, middle.block],
8372-
// [s.init, otherwise]
8373-
// br scalar.body
8374-
//
8375-
// scalar.body:
8376-
// i = phi [0, scalar.ph], [i+1, scalar.body]
8377-
// s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
8378-
// s2 = a[i]
8379-
// b[i] = s2 - s1
8380-
// br cond, scalar.body, exit.block
8381-
//
8382-
// exit.block:
8383-
// lo = lcssa.phi [s1, scalar.body],
8384-
// [vector.recur.extract.for.phi, middle.block]
8385-
//
8386-
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
8387-
// Extract the penultimate value of the recurrence and use it as operand for
8388-
// the VPIRInstruction modeling the phi.
8389-
for (VPUser *U : FOR->users()) {
8390-
using namespace llvm::VPlanPatternMatch;
8391-
if (!match(U, m_ExtractLastElement(m_Specific(FOR))))
8392-
continue;
8393-
// For VF vscale x 1, if vscale = 1, we are unable to extract the
8394-
// penultimate value of the recurrence. Instead we rely on the existing
8395-
// extract of the last element from the result of
8396-
// VPInstruction::FirstOrderRecurrenceSplice.
8397-
// TODO: Consider vscale_range info and UF.
8398-
if (LoopVectorizationPlanner::getDecisionAndClampRange(IsScalableOne,
8399-
Range))
8400-
return;
8401-
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
8402-
VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()},
8403-
{}, "vector.recur.extract.for.phi");
8404-
cast<VPInstruction>(U)->replaceAllUsesWith(PenultimateElement);
8405-
}
8406-
}
8407-
}
8408-
84098204
VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
84108205
VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
84118206

@@ -8598,9 +8393,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85988393
R->setOperand(1, WideIV->getStepValue());
85998394
}
86008395

8601-
addExitUsersForFirstOrderRecurrences(*Plan, Range);
8396+
VPlanTransforms::runPass(
8397+
VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range);
86028398
DenseMap<VPValue *, VPValue *> IVEndValues;
8603-
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
8399+
VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan,
8400+
RecipeBuilder, IVEndValues);
86048401

86058402
// ---------------------------------------------------------------------------
86068403
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8711,7 +8508,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
87118508
DenseMap<VPValue *, VPValue *> IVEndValues;
87128509
// TODO: IVEndValues are not used yet in the native path, to optimize exit
87138510
// values.
8714-
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
8511+
VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan,
8512+
RecipeBuilder, IVEndValues);
87158513

87168514
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
87178515
return Plan;

0 commit comments

Comments
 (0)