@@ -8201,211 +8201,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
8201
8201
}
8202
8202
}
8203
8203
8204
- // / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8205
- // / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8206
- // / the end value of the induction.
8207
- static VPInstruction *addResumePhiRecipeForInduction (
8208
- VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8209
- VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
8210
- auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8211
- // Truncated wide inductions resume from the last lane of their vector value
8212
- // in the last vector iteration which is handled elsewhere.
8213
- if (WideIntOrFp && WideIntOrFp->getTruncInst ())
8214
- return nullptr ;
8215
-
8216
- VPValue *Start = WideIV->getStartValue ();
8217
- VPValue *Step = WideIV->getStepValue ();
8218
- const InductionDescriptor &ID = WideIV->getInductionDescriptor ();
8219
- VPValue *EndValue = VectorTC;
8220
- if (!WideIntOrFp || !WideIntOrFp->isCanonical ()) {
8221
- EndValue = VectorPHBuilder.createDerivedIV (
8222
- ID.getKind (), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp ()),
8223
- Start, VectorTC, Step);
8224
- }
8225
-
8226
- // EndValue is derived from the vector trip count (which has the same type as
8227
- // the widest induction) and thus may be wider than the induction here.
8228
- Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType (WideIV);
8229
- if (ScalarTypeOfWideIV != TypeInfo.inferScalarType (EndValue)) {
8230
- EndValue = VectorPHBuilder.createScalarCast (Instruction::Trunc, EndValue,
8231
- ScalarTypeOfWideIV,
8232
- WideIV->getDebugLoc ());
8233
- }
8234
-
8235
- auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi (
8236
- {EndValue, Start}, WideIV->getDebugLoc (), " bc.resume.val" );
8237
- return ResumePhiRecipe;
8238
- }
8239
-
8240
- // / Create resume phis in the scalar preheader for first-order recurrences,
8241
- // / reductions and inductions, and update the VPIRInstructions wrapping the
8242
- // / original phis in the scalar header. End values for inductions are added to
8243
- // / \p IVEndValues.
8244
- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8245
- DenseMap<VPValue *, VPValue *> &IVEndValues) {
8246
- VPTypeAnalysis TypeInfo (Plan);
8247
- auto *ScalarPH = Plan.getScalarPreheader ();
8248
- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors ()[0 ]);
8249
- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8250
- VPBuilder VectorPHBuilder (
8251
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor ()));
8252
- VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8253
- VPBuilder ScalarPHBuilder (ScalarPH);
8254
- for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader ()->phis ()) {
8255
- auto *ScalarPhiIRI = cast<VPIRPhi>(&ScalarPhiR);
8256
-
8257
- // TODO: Extract final value from induction recipe initially, optimize to
8258
- // pre-computed end value together in optimizeInductionExitUsers.
8259
- auto *VectorPhiR =
8260
- cast<VPHeaderPHIRecipe>(Builder.getRecipe (&ScalarPhiIRI->getIRPhi ()));
8261
- if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8262
- if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction (
8263
- WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8264
- &Plan.getVectorTripCount ())) {
8265
- assert (isa<VPPhi>(ResumePhi) && " Expected a phi" );
8266
- IVEndValues[WideIVR] = ResumePhi->getOperand (0 );
8267
- ScalarPhiIRI->addOperand (ResumePhi);
8268
- continue ;
8269
- }
8270
- // TODO: Also handle truncated inductions here. Computing end-values
8271
- // separately should be done as VPlan-to-VPlan optimization, after
8272
- // legalizing all resume values to use the last lane from the loop.
8273
- assert (cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst () &&
8274
- " should only skip truncated wide inductions" );
8275
- continue ;
8276
- }
8277
-
8278
- // The backedge value provides the value to resume coming out of a loop,
8279
- // which for FORs is a vector whose last element needs to be extracted. The
8280
- // start value provides the value if the loop is bypassed.
8281
- bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8282
- auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue ();
8283
- assert (VectorRegion->getSingleSuccessor () == Plan.getMiddleBlock () &&
8284
- " Cannot handle loops with uncountable early exits" );
8285
- if (IsFOR)
8286
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp (
8287
- VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
8288
- " vector.recur.extract" );
8289
- StringRef Name = IsFOR ? " scalar.recur.init" : " bc.merge.rdx" ;
8290
- auto *ResumePhiR = ScalarPHBuilder.createScalarPhi (
8291
- {ResumeFromVectorLoop, VectorPhiR->getStartValue ()}, {}, Name);
8292
- ScalarPhiIRI->addOperand (ResumePhiR);
8293
- }
8294
- }
8295
-
8296
- // / Handle users in the exit block for first order reductions in the original
8297
- // / exit block. The penultimate value of recurrences is fed to their LCSSA phi
8298
- // / users in the original exit block using the VPIRInstruction wrapping to the
8299
- // / LCSSA phi.
8300
- static void addExitUsersForFirstOrderRecurrences (VPlan &Plan, VFRange &Range) {
8301
- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8302
- auto *ScalarPHVPBB = Plan.getScalarPreheader ();
8303
- auto *MiddleVPBB = Plan.getMiddleBlock ();
8304
- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8305
- VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8306
-
8307
- auto IsScalableOne = [](ElementCount VF) -> bool {
8308
- return VF == ElementCount::getScalable (1 );
8309
- };
8310
-
8311
- for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock ()->phis ()) {
8312
- auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
8313
- if (!FOR)
8314
- continue ;
8315
-
8316
- assert (VectorRegion->getSingleSuccessor () == Plan.getMiddleBlock () &&
8317
- " Cannot handle loops with uncountable early exits" );
8318
-
8319
- // This is the second phase of vectorizing first-order recurrences, creating
8320
- // extract for users outside the loop. An overview of the transformation is
8321
- // described below. Suppose we have the following loop with some use after
8322
- // the loop of the last a[i-1],
8323
- //
8324
- // for (int i = 0; i < n; ++i) {
8325
- // t = a[i - 1];
8326
- // b[i] = a[i] - t;
8327
- // }
8328
- // use t;
8329
- //
8330
- // There is a first-order recurrence on "a". For this loop, the shorthand
8331
- // scalar IR looks like:
8332
- //
8333
- // scalar.ph:
8334
- // s.init = a[-1]
8335
- // br scalar.body
8336
- //
8337
- // scalar.body:
8338
- // i = phi [0, scalar.ph], [i+1, scalar.body]
8339
- // s1 = phi [s.init, scalar.ph], [s2, scalar.body]
8340
- // s2 = a[i]
8341
- // b[i] = s2 - s1
8342
- // br cond, scalar.body, exit.block
8343
- //
8344
- // exit.block:
8345
- // use = lcssa.phi [s1, scalar.body]
8346
- //
8347
- // In this example, s1 is a recurrence because it's value depends on the
8348
- // previous iteration. In the first phase of vectorization, we created a
8349
- // VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
8350
- // for users in the scalar preheader and exit block.
8351
- //
8352
- // vector.ph:
8353
- // v_init = vector(..., ..., ..., a[-1])
8354
- // br vector.body
8355
- //
8356
- // vector.body
8357
- // i = phi [0, vector.ph], [i+4, vector.body]
8358
- // v1 = phi [v_init, vector.ph], [v2, vector.body]
8359
- // v2 = a[i, i+1, i+2, i+3]
8360
- // b[i] = v2 - v1
8361
- // // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
8362
- // b[i, i+1, i+2, i+3] = v2 - v1
8363
- // br cond, vector.body, middle.block
8364
- //
8365
- // middle.block:
8366
- // vector.recur.extract.for.phi = v2(2)
8367
- // vector.recur.extract = v2(3)
8368
- // br cond, scalar.ph, exit.block
8369
- //
8370
- // scalar.ph:
8371
- // scalar.recur.init = phi [vector.recur.extract, middle.block],
8372
- // [s.init, otherwise]
8373
- // br scalar.body
8374
- //
8375
- // scalar.body:
8376
- // i = phi [0, scalar.ph], [i+1, scalar.body]
8377
- // s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
8378
- // s2 = a[i]
8379
- // b[i] = s2 - s1
8380
- // br cond, scalar.body, exit.block
8381
- //
8382
- // exit.block:
8383
- // lo = lcssa.phi [s1, scalar.body],
8384
- // [vector.recur.extract.for.phi, middle.block]
8385
- //
8386
- // Now update VPIRInstructions modeling LCSSA phis in the exit block.
8387
- // Extract the penultimate value of the recurrence and use it as operand for
8388
- // the VPIRInstruction modeling the phi.
8389
- for (VPUser *U : FOR->users ()) {
8390
- using namespace llvm ::VPlanPatternMatch;
8391
- if (!match (U, m_ExtractLastElement (m_Specific (FOR))))
8392
- continue ;
8393
- // For VF vscale x 1, if vscale = 1, we are unable to extract the
8394
- // penultimate value of the recurrence. Instead we rely on the existing
8395
- // extract of the last element from the result of
8396
- // VPInstruction::FirstOrderRecurrenceSplice.
8397
- // TODO: Consider vscale_range info and UF.
8398
- if (LoopVectorizationPlanner::getDecisionAndClampRange (IsScalableOne,
8399
- Range))
8400
- return ;
8401
- VPValue *PenultimateElement = MiddleBuilder.createNaryOp (
8402
- VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue ()},
8403
- {}, " vector.recur.extract.for.phi" );
8404
- cast<VPInstruction>(U)->replaceAllUsesWith (PenultimateElement);
8405
- }
8406
- }
8407
- }
8408
-
8409
8204
VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes (
8410
8205
VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
8411
8206
@@ -8598,9 +8393,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
8598
8393
R->setOperand (1 , WideIV->getStepValue ());
8599
8394
}
8600
8395
8601
- addExitUsersForFirstOrderRecurrences (*Plan, Range);
8396
+ VPlanTransforms::runPass (
8397
+ VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range);
8602
8398
DenseMap<VPValue *, VPValue *> IVEndValues;
8603
- addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
8399
+ VPlanTransforms::runPass (VPlanTransforms::addScalarResumePhis, *Plan,
8400
+ RecipeBuilder, IVEndValues);
8604
8401
8605
8402
// ---------------------------------------------------------------------------
8606
8403
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8711,7 +8508,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
8711
8508
DenseMap<VPValue *, VPValue *> IVEndValues;
8712
8509
// TODO: IVEndValues are not used yet in the native path, to optimize exit
8713
8510
// values.
8714
- addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
8511
+ VPlanTransforms::runPass (VPlanTransforms::addScalarResumePhis, *Plan,
8512
+ RecipeBuilder, IVEndValues);
8715
8513
8716
8514
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
8717
8515
return Plan;
0 commit comments