@@ -8247,211 +8247,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
8247
8247
}
8248
8248
}
8249
8249
8250
- // / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
8251
- // / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
8252
- // / the end value of the induction.
8253
- static VPInstruction *addResumePhiRecipeForInduction (
8254
- VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8255
- VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
8256
- auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
8257
- // Truncated wide inductions resume from the last lane of their vector value
8258
- // in the last vector iteration which is handled elsewhere.
8259
- if (WideIntOrFp && WideIntOrFp->getTruncInst ())
8260
- return nullptr ;
8261
-
8262
- VPValue *Start = WideIV->getStartValue ();
8263
- VPValue *Step = WideIV->getStepValue ();
8264
- const InductionDescriptor &ID = WideIV->getInductionDescriptor ();
8265
- VPValue *EndValue = VectorTC;
8266
- if (!WideIntOrFp || !WideIntOrFp->isCanonical ()) {
8267
- EndValue = VectorPHBuilder.createDerivedIV (
8268
- ID.getKind (), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp ()),
8269
- Start, VectorTC, Step);
8270
- }
8271
-
8272
- // EndValue is derived from the vector trip count (which has the same type as
8273
- // the widest induction) and thus may be wider than the induction here.
8274
- Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType (WideIV);
8275
- if (ScalarTypeOfWideIV != TypeInfo.inferScalarType (EndValue)) {
8276
- EndValue = VectorPHBuilder.createScalarCast (Instruction::Trunc, EndValue,
8277
- ScalarTypeOfWideIV,
8278
- WideIV->getDebugLoc ());
8279
- }
8280
-
8281
- auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi (
8282
- {EndValue, Start}, WideIV->getDebugLoc (), " bc.resume.val" );
8283
- return ResumePhiRecipe;
8284
- }
8285
-
8286
- // / Create resume phis in the scalar preheader for first-order recurrences,
8287
- // / reductions and inductions, and update the VPIRInstructions wrapping the
8288
- // / original phis in the scalar header. End values for inductions are added to
8289
- // / \p IVEndValues.
8290
- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8291
- DenseMap<VPValue *, VPValue *> &IVEndValues) {
8292
- VPTypeAnalysis TypeInfo (Plan);
8293
- auto *ScalarPH = Plan.getScalarPreheader ();
8294
- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors ()[0 ]);
8295
- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8296
- VPBuilder VectorPHBuilder (
8297
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor ()));
8298
- VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8299
- VPBuilder ScalarPHBuilder (ScalarPH);
8300
- for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader ()->phis ()) {
8301
- auto *ScalarPhiIRI = cast<VPIRPhi>(&ScalarPhiR);
8302
-
8303
- // TODO: Extract final value from induction recipe initially, optimize to
8304
- // pre-computed end value together in optimizeInductionExitUsers.
8305
- auto *VectorPhiR =
8306
- cast<VPHeaderPHIRecipe>(Builder.getRecipe (&ScalarPhiIRI->getIRPhi ()));
8307
- if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
8308
- if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction (
8309
- WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
8310
- &Plan.getVectorTripCount ())) {
8311
- assert (isa<VPPhi>(ResumePhi) && " Expected a phi" );
8312
- IVEndValues[WideIVR] = ResumePhi->getOperand (0 );
8313
- ScalarPhiIRI->addOperand (ResumePhi);
8314
- continue ;
8315
- }
8316
- // TODO: Also handle truncated inductions here. Computing end-values
8317
- // separately should be done as VPlan-to-VPlan optimization, after
8318
- // legalizing all resume values to use the last lane from the loop.
8319
- assert (cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst () &&
8320
- " should only skip truncated wide inductions" );
8321
- continue ;
8322
- }
8323
-
8324
- // The backedge value provides the value to resume coming out of a loop,
8325
- // which for FORs is a vector whose last element needs to be extracted. The
8326
- // start value provides the value if the loop is bypassed.
8327
- bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8328
- auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue ();
8329
- assert (VectorRegion->getSingleSuccessor () == Plan.getMiddleBlock () &&
8330
- " Cannot handle loops with uncountable early exits" );
8331
- if (IsFOR)
8332
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp (
8333
- VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
8334
- " vector.recur.extract" );
8335
- StringRef Name = IsFOR ? " scalar.recur.init" : " bc.merge.rdx" ;
8336
- auto *ResumePhiR = ScalarPHBuilder.createScalarPhi (
8337
- {ResumeFromVectorLoop, VectorPhiR->getStartValue ()}, {}, Name);
8338
- ScalarPhiIRI->addOperand (ResumePhiR);
8339
- }
8340
- }
8341
-
8342
- // / Handle users in the exit block for first order reductions in the original
8343
- // / exit block. The penultimate value of recurrences is fed to their LCSSA phi
8344
- // / users in the original exit block using the VPIRInstruction wrapping to the
8345
- // / LCSSA phi.
8346
- static void addExitUsersForFirstOrderRecurrences (VPlan &Plan, VFRange &Range) {
8347
- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion ();
8348
- auto *ScalarPHVPBB = Plan.getScalarPreheader ();
8349
- auto *MiddleVPBB = Plan.getMiddleBlock ();
8350
- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8351
- VPBuilder MiddleBuilder (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8352
-
8353
- auto IsScalableOne = [](ElementCount VF) -> bool {
8354
- return VF == ElementCount::getScalable (1 );
8355
- };
8356
-
8357
- for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock ()->phis ()) {
8358
- auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
8359
- if (!FOR)
8360
- continue ;
8361
-
8362
- assert (VectorRegion->getSingleSuccessor () == Plan.getMiddleBlock () &&
8363
- " Cannot handle loops with uncountable early exits" );
8364
-
8365
- // This is the second phase of vectorizing first-order recurrences, creating
8366
- // extract for users outside the loop. An overview of the transformation is
8367
- // described below. Suppose we have the following loop with some use after
8368
- // the loop of the last a[i-1],
8369
- //
8370
- // for (int i = 0; i < n; ++i) {
8371
- // t = a[i - 1];
8372
- // b[i] = a[i] - t;
8373
- // }
8374
- // use t;
8375
- //
8376
- // There is a first-order recurrence on "a". For this loop, the shorthand
8377
- // scalar IR looks like:
8378
- //
8379
- // scalar.ph:
8380
- // s.init = a[-1]
8381
- // br scalar.body
8382
- //
8383
- // scalar.body:
8384
- // i = phi [0, scalar.ph], [i+1, scalar.body]
8385
- // s1 = phi [s.init, scalar.ph], [s2, scalar.body]
8386
- // s2 = a[i]
8387
- // b[i] = s2 - s1
8388
- // br cond, scalar.body, exit.block
8389
- //
8390
- // exit.block:
8391
- // use = lcssa.phi [s1, scalar.body]
8392
- //
8393
- // In this example, s1 is a recurrence because it's value depends on the
8394
- // previous iteration. In the first phase of vectorization, we created a
8395
- // VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
8396
- // for users in the scalar preheader and exit block.
8397
- //
8398
- // vector.ph:
8399
- // v_init = vector(..., ..., ..., a[-1])
8400
- // br vector.body
8401
- //
8402
- // vector.body
8403
- // i = phi [0, vector.ph], [i+4, vector.body]
8404
- // v1 = phi [v_init, vector.ph], [v2, vector.body]
8405
- // v2 = a[i, i+1, i+2, i+3]
8406
- // b[i] = v2 - v1
8407
- // // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
8408
- // b[i, i+1, i+2, i+3] = v2 - v1
8409
- // br cond, vector.body, middle.block
8410
- //
8411
- // middle.block:
8412
- // vector.recur.extract.for.phi = v2(2)
8413
- // vector.recur.extract = v2(3)
8414
- // br cond, scalar.ph, exit.block
8415
- //
8416
- // scalar.ph:
8417
- // scalar.recur.init = phi [vector.recur.extract, middle.block],
8418
- // [s.init, otherwise]
8419
- // br scalar.body
8420
- //
8421
- // scalar.body:
8422
- // i = phi [0, scalar.ph], [i+1, scalar.body]
8423
- // s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
8424
- // s2 = a[i]
8425
- // b[i] = s2 - s1
8426
- // br cond, scalar.body, exit.block
8427
- //
8428
- // exit.block:
8429
- // lo = lcssa.phi [s1, scalar.body],
8430
- // [vector.recur.extract.for.phi, middle.block]
8431
- //
8432
- // Now update VPIRInstructions modeling LCSSA phis in the exit block.
8433
- // Extract the penultimate value of the recurrence and use it as operand for
8434
- // the VPIRInstruction modeling the phi.
8435
- for (VPUser *U : FOR->users ()) {
8436
- using namespace llvm ::VPlanPatternMatch;
8437
- if (!match (U, m_ExtractLastElement (m_Specific (FOR))))
8438
- continue ;
8439
- // For VF vscale x 1, if vscale = 1, we are unable to extract the
8440
- // penultimate value of the recurrence. Instead we rely on the existing
8441
- // extract of the last element from the result of
8442
- // VPInstruction::FirstOrderRecurrenceSplice.
8443
- // TODO: Consider vscale_range info and UF.
8444
- if (LoopVectorizationPlanner::getDecisionAndClampRange (IsScalableOne,
8445
- Range))
8446
- return ;
8447
- VPValue *PenultimateElement = MiddleBuilder.createNaryOp (
8448
- VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue ()},
8449
- {}, " vector.recur.extract.for.phi" );
8450
- cast<VPInstruction>(U)->replaceAllUsesWith (PenultimateElement);
8451
- }
8452
- }
8453
- }
8454
-
8455
8250
VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes (
8456
8251
VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
8457
8252
@@ -8644,9 +8439,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
8644
8439
R->setOperand (1 , WideIV->getStepValue ());
8645
8440
}
8646
8441
8647
- addExitUsersForFirstOrderRecurrences (*Plan, Range);
8442
+ VPlanTransforms::runPass (
8443
+ VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range);
8648
8444
DenseMap<VPValue *, VPValue *> IVEndValues;
8649
- addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
8445
+ VPlanTransforms::runPass (VPlanTransforms::addScalarResumePhis, *Plan,
8446
+ RecipeBuilder, IVEndValues);
8650
8447
8651
8448
// ---------------------------------------------------------------------------
8652
8449
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8757,7 +8554,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
8757
8554
DenseMap<VPValue *, VPValue *> IVEndValues;
8758
8555
// TODO: IVEndValues are not used yet in the native path, to optimize exit
8759
8556
// values.
8760
- addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
8557
+ VPlanTransforms::runPass (VPlanTransforms::addScalarResumePhis, *Plan,
8558
+ RecipeBuilder, IVEndValues);
8761
8559
8762
8560
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
8763
8561
return Plan;
0 commit comments