@@ -8201,211 +8201,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
82018201 }
82028202}
82038203
8204- // / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8205- // / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8206- // / the end value of the induction.
8207- static VPInstruction *addResumePhiRecipeForInduction (
8208- VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8209- VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
8210- auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8211- // Truncated wide inductions resume from the last lane of their vector value
8212- // in the last vector iteration which is handled elsewhere.
8213- if (WideIntOrFp && WideIntOrFp->getTruncInst ())
8214- return nullptr ;
8215-
8216- VPValue *Start = WideIV->getStartValue ();
8217- VPValue *Step = WideIV->getStepValue ();
8218- const InductionDescriptor &ID = WideIV->getInductionDescriptor ();
8219- VPValue *EndValue = VectorTC;
8220- if (!WideIntOrFp || !WideIntOrFp->isCanonical ()) {
8221- EndValue = VectorPHBuilder.createDerivedIV (
8222- ID.getKind (), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp ()),
8223- Start, VectorTC, Step);
8224- }
8225-
8226- // EndValue is derived from the vector trip count (which has the same type as
8227- // the widest induction) and thus may be wider than the induction here.
8228- Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType (WideIV);
8229- if (ScalarTypeOfWideIV != TypeInfo.inferScalarType (EndValue)) {
8230- EndValue = VectorPHBuilder.createScalarCast (Instruction::Trunc, EndValue,
8231- ScalarTypeOfWideIV,
8232- WideIV->getDebugLoc ());
8233- }
8234-
8235- auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi (
8236- {EndValue, Start}, WideIV->getDebugLoc (), " bc.resume.val" );
8237- return ResumePhiRecipe;
8238- }
8239-
8240- // / Create resume phis in the scalar preheader for first-order recurrences,
8241- // / reductions and inductions, and update the VPIRInstructions wrapping the
8242- // / original phis in the scalar header. End values for inductions are added to
8243- // / \p IVEndValues.
8244- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8245- DenseMap<VPValue *, VPValue *> &IVEndValues) {
8246- VPTypeAnalysis TypeInfo (Plan);
8247- auto *ScalarPH = Plan.getScalarPreheader ();
8248- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors ()[0 ]);
8249- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8250- VPBuilder VectorPHBuilder (
8251- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor ()));
8252- VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8253- VPBuilder ScalarPHBuilder (ScalarPH);
8254- for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader ()->phis ()) {
8255- auto *ScalarPhiIRI = cast<VPIRPhi>(&ScalarPhiR);
8256-
8257- // TODO: Extract final value from induction recipe initially, optimize to
8258- // pre-computed end value together in optimizeInductionExitUsers.
8259- auto *VectorPhiR =
8260- cast<VPHeaderPHIRecipe>(Builder.getRecipe (&ScalarPhiIRI->getIRPhi ()));
8261- if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8262- if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction (
8263- WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8264- &Plan.getVectorTripCount ())) {
8265- assert (isa<VPPhi>(ResumePhi) && " Expected a phi" );
8266- IVEndValues[WideIVR] = ResumePhi->getOperand (0 );
8267- ScalarPhiIRI->addOperand (ResumePhi);
8268- continue ;
8269- }
8270- // TODO: Also handle truncated inductions here. Computing end-values
8271- // separately should be done as VPlan-to-VPlan optimization, after
8272- // legalizing all resume values to use the last lane from the loop.
8273- assert (cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst () &&
8274- " should only skip truncated wide inductions" );
8275- continue ;
8276- }
8277-
8278- // The backedge value provides the value to resume coming out of a loop,
8279- // which for FORs is a vector whose last element needs to be extracted. The
8280- // start value provides the value if the loop is bypassed.
8281- bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8282- auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue ();
8283- assert (VectorRegion->getSingleSuccessor () == Plan.getMiddleBlock () &&
8284- " Cannot handle loops with uncountable early exits" );
8285- if (IsFOR)
8286- ResumeFromVectorLoop = MiddleBuilder.createNaryOp (
8287- VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
8288- " vector.recur.extract" );
8289- StringRef Name = IsFOR ? " scalar.recur.init" : " bc.merge.rdx" ;
8290- auto *ResumePhiR = ScalarPHBuilder.createScalarPhi (
8291- {ResumeFromVectorLoop, VectorPhiR->getStartValue ()}, {}, Name);
8292- ScalarPhiIRI->addOperand (ResumePhiR);
8293- }
8294- }
8295-
8296- // / Handle users in the exit block for first order reductions in the original
8297- // / exit block. The penultimate value of recurrences is fed to their LCSSA phi
8298- // / users in the original exit block using the VPIRInstruction wrapping to the
8299- // / LCSSA phi.
8300- static void addExitUsersForFirstOrderRecurrences (VPlan &Plan, VFRange &Range) {
8301- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8302- auto *ScalarPHVPBB = Plan.getScalarPreheader ();
8303- auto *MiddleVPBB = Plan.getMiddleBlock ();
8304- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8305- VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8306-
8307- auto IsScalableOne = [](ElementCount VF) -> bool {
8308- return VF == ElementCount::getScalable (1 );
8309- };
8310-
8311- for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock ()->phis ()) {
8312- auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
8313- if (!FOR)
8314- continue ;
8315-
8316- assert (VectorRegion->getSingleSuccessor () == Plan.getMiddleBlock () &&
8317- " Cannot handle loops with uncountable early exits" );
8318-
8319- // This is the second phase of vectorizing first-order recurrences, creating
8320- // extract for users outside the loop. An overview of the transformation is
8321- // described below. Suppose we have the following loop with some use after
8322- // the loop of the last a[i-1],
8323- //
8324- // for (int i = 0; i < n; ++i) {
8325- // t = a[i - 1];
8326- // b[i] = a[i] - t;
8327- // }
8328- // use t;
8329- //
8330- // There is a first-order recurrence on "a". For this loop, the shorthand
8331- // scalar IR looks like:
8332- //
8333- // scalar.ph:
8334- // s.init = a[-1]
8335- // br scalar.body
8336- //
8337- // scalar.body:
8338- // i = phi [0, scalar.ph], [i+1, scalar.body]
8339- // s1 = phi [s.init, scalar.ph], [s2, scalar.body]
8340- // s2 = a[i]
8341- // b[i] = s2 - s1
8342- // br cond, scalar.body, exit.block
8343- //
8344- // exit.block:
8345- // use = lcssa.phi [s1, scalar.body]
8346- //
8347- // In this example, s1 is a recurrence because it's value depends on the
8348- // previous iteration. In the first phase of vectorization, we created a
8349- // VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
8350- // for users in the scalar preheader and exit block.
8351- //
8352- // vector.ph:
8353- // v_init = vector(..., ..., ..., a[-1])
8354- // br vector.body
8355- //
8356- // vector.body
8357- // i = phi [0, vector.ph], [i+4, vector.body]
8358- // v1 = phi [v_init, vector.ph], [v2, vector.body]
8359- // v2 = a[i, i+1, i+2, i+3]
8360- // b[i] = v2 - v1
8361- // // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
8362- // b[i, i+1, i+2, i+3] = v2 - v1
8363- // br cond, vector.body, middle.block
8364- //
8365- // middle.block:
8366- // vector.recur.extract.for.phi = v2(2)
8367- // vector.recur.extract = v2(3)
8368- // br cond, scalar.ph, exit.block
8369- //
8370- // scalar.ph:
8371- // scalar.recur.init = phi [vector.recur.extract, middle.block],
8372- // [s.init, otherwise]
8373- // br scalar.body
8374- //
8375- // scalar.body:
8376- // i = phi [0, scalar.ph], [i+1, scalar.body]
8377- // s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
8378- // s2 = a[i]
8379- // b[i] = s2 - s1
8380- // br cond, scalar.body, exit.block
8381- //
8382- // exit.block:
8383- // lo = lcssa.phi [s1, scalar.body],
8384- // [vector.recur.extract.for.phi, middle.block]
8385- //
8386- // Now update VPIRInstructions modeling LCSSA phis in the exit block.
8387- // Extract the penultimate value of the recurrence and use it as operand for
8388- // the VPIRInstruction modeling the phi.
8389- for (VPUser *U : FOR->users ()) {
8390- using namespace llvm ::VPlanPatternMatch;
8391- if (!match (U, m_ExtractLastElement (m_Specific (FOR))))
8392- continue ;
8393- // For VF vscale x 1, if vscale = 1, we are unable to extract the
8394- // penultimate value of the recurrence. Instead we rely on the existing
8395- // extract of the last element from the result of
8396- // VPInstruction::FirstOrderRecurrenceSplice.
8397- // TODO: Consider vscale_range info and UF.
8398- if (LoopVectorizationPlanner::getDecisionAndClampRange (IsScalableOne,
8399- Range))
8400- return ;
8401- VPValue *PenultimateElement = MiddleBuilder.createNaryOp (
8402- VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue ()},
8403- {}, " vector.recur.extract.for.phi" );
8404- cast<VPInstruction>(U)->replaceAllUsesWith (PenultimateElement);
8405- }
8406- }
8407- }
8408-
84098204VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes (
84108205 VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
84118206
@@ -8598,9 +8393,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85988393 R->setOperand (1 , WideIV->getStepValue ());
85998394 }
86008395
8601- addExitUsersForFirstOrderRecurrences (*Plan, Range);
8396+ VPlanTransforms::runPass (
8397+ VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range);
86028398 DenseMap<VPValue *, VPValue *> IVEndValues;
8603- addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
8399+ VPlanTransforms::runPass (VPlanTransforms::addScalarResumePhis, *Plan,
8400+ RecipeBuilder, IVEndValues);
86048401
86058402 // ---------------------------------------------------------------------------
86068403 // Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8711,7 +8508,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
87118508 DenseMap<VPValue *, VPValue *> IVEndValues;
87128509 // TODO: IVEndValues are not used yet in the native path, to optimize exit
87138510 // values.
8714- addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
8511+ VPlanTransforms::runPass (VPlanTransforms::addScalarResumePhis, *Plan,
8512+ RecipeBuilder, IVEndValues);
87158513
87168514 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
87178515 return Plan;
0 commit comments