@@ -1547,6 +1547,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
15471547 }
15481548}
15491549
1550+ // / This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
1551+ // / an induction variable at the preheader.
1552+ static VPSingleDefRecipe *createStepVector (VPValue *StartValue, VPValue *Step,
1553+ Type *InductionTy,
1554+ const InductionDescriptor &ID,
1555+ VPBasicBlock *VectorPHVPBB,
1556+ DebugLoc DL) {
1557+ Type *IntTy = InductionTy->isIntegerTy ()
1558+ ? InductionTy
1559+ : IntegerType::get (InductionTy->getContext (),
1560+ InductionTy->getScalarSizeInBits ());
1561+ // Create a vector of consecutive numbers from zero to VF.
1562+ VPSingleDefRecipe *InitVec =
1563+ new VPWidenIntrinsicRecipe (Intrinsic::stepvector, {}, IntTy, DL);
1564+ VectorPHVPBB->appendRecipe (InitVec);
1565+
1566+ if (InductionTy->isIntegerTy ()) {
1567+ auto *Mul = new VPInstruction (Instruction::Mul, {InitVec, Step}, DL);
1568+ VectorPHVPBB->appendRecipe (Mul);
1569+ auto *SteppedStart =
1570+ new VPInstruction (Instruction::Add, {StartValue, Mul}, {}, " induction" );
1571+ VectorPHVPBB->appendRecipe (SteppedStart);
1572+ return SteppedStart;
1573+ } else {
1574+ FastMathFlags FMF = ID.getInductionBinOp ()->getFastMathFlags ();
1575+ InitVec = new VPWidenCastRecipe (Instruction::UIToFP, InitVec, InductionTy);
1576+ VectorPHVPBB->appendRecipe (InitVec);
1577+ auto *Mul = new VPInstruction (Instruction::FMul, {InitVec, Step}, FMF, DL);
1578+ VectorPHVPBB->appendRecipe (Mul);
1579+ Instruction::BinaryOps BinOp = ID.getInductionOpcode ();
1580+ auto *SteppedStart =
1581+ new VPInstruction (BinOp, {StartValue, Mul}, FMF, DL, " induction" );
1582+ VectorPHVPBB->appendRecipe (SteppedStart);
1583+ return SteppedStart;
1584+ }
1585+ }
1586+
1587+ // / Lower widen iv recipes into recipes with EVL.
1588+ static void
1589+ transformWidenIVRecipestoEVLRecipes (VPWidenIntOrFpInductionRecipe *WidenIV,
1590+ VPlan &Plan, VPValue *EVL) {
1591+ DebugLoc DL = WidenIV->getDebugLoc ();
1592+ const InductionDescriptor &ID = WidenIV->getInductionDescriptor ();
1593+ auto *CanonicalIVIncrement =
1594+ cast<VPInstruction>(Plan.getCanonicalIV ()->getBackedgeValue ());
1595+ VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion ()->getPreheaderVPBB ();
1596+ VPBasicBlock *ExitingVPBB =
1597+ Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
1598+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
1599+ VPValue *StartValue = WidenIV->getStartValue ();
1600+ VPValue *Step = WidenIV->getStepValue ();
1601+ if (TruncInst *I = WidenIV->getTruncInst ()) {
1602+ Type *TruncTy = I->getType ();
1603+ auto *R = new VPScalarCastRecipe (Instruction::Trunc, StartValue, TruncTy);
1604+ VectorPHVPBB->appendRecipe (R);
1605+ StartValue = R;
1606+ R = new VPScalarCastRecipe (Instruction::Trunc, Step, TruncTy);
1607+ VectorPHVPBB->appendRecipe (R);
1608+ Step = R;
1609+ }
1610+ Type *InductionTy = TypeInfo.inferScalarType (StartValue);
1611+ LLVMContext &Ctx = InductionTy->getContext ();
1612+ VPValue *TrueMask = Plan.getOrAddLiveIn (ConstantInt::getTrue (Ctx));
1613+
1614+ // Construct the initial value of the vector IV in the vector loop preheader
1615+ VPSingleDefRecipe *SteppedStart =
1616+ createStepVector (StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
1617+
1618+ // Create the vector phi node for both int. and fp. induction variables
1619+ // and determine the kind of arithmetic we will perform
1620+ auto *VecInd = new VPWidenPHIRecipe (WidenIV->getPHINode ());
1621+ VecInd->insertBefore (WidenIV);
1622+ WidenIV->replaceAllUsesWith (VecInd);
1623+ Intrinsic::ID VPArithOp;
1624+ Instruction::BinaryOps MulOp;
1625+ if (InductionTy->isIntegerTy ()) {
1626+ VPArithOp = Intrinsic::vp_add;
1627+ MulOp = Instruction::Mul;
1628+ } else {
1629+ VPArithOp = ID.getInductionOpcode () == Instruction::FAdd
1630+ ? Intrinsic::vp_fadd
1631+ : Intrinsic::vp_fsub;
1632+ MulOp = Instruction::FMul;
1633+ }
1634+
1635+ // Multiply the runtime VF by the step
1636+ VPSingleDefRecipe *ScalarMul;
1637+ if (InductionTy->isFloatingPointTy ()) {
1638+ FastMathFlags FMF = ID.getInductionBinOp ()->getFastMathFlags ();
1639+ auto *CastEVL =
1640+ new VPScalarCastRecipe (Instruction::UIToFP, EVL, InductionTy);
1641+ CastEVL->insertBefore (CanonicalIVIncrement);
1642+ ScalarMul = new VPInstruction (MulOp, {Step, CastEVL}, FMF, DL);
1643+ } else {
1644+ unsigned InductionSz = InductionTy->getScalarSizeInBits ();
1645+ unsigned EVLSz = TypeInfo.inferScalarType (EVL)->getScalarSizeInBits ();
1646+ VPValue *CastEVL = EVL;
1647+ if (InductionSz != EVLSz) {
1648+ auto *R = new VPScalarCastRecipe (EVLSz > InductionSz ? Instruction::Trunc
1649+ : Instruction::ZExt,
1650+ EVL, InductionTy);
1651+ R->insertBefore (CanonicalIVIncrement);
1652+ CastEVL = R;
1653+ }
1654+ ScalarMul = new VPInstruction (MulOp, {Step, CastEVL}, DL);
1655+ }
1656+ ScalarMul->insertBefore (CanonicalIVIncrement);
1657+ // Create a vector splat to use in the induction update.
1658+ auto *SplatVF =
1659+ new VPWidenIntrinsicRecipe (Intrinsic::experimental_vp_splat,
1660+ {ScalarMul, TrueMask, EVL}, InductionTy, DL);
1661+ SplatVF->insertBefore (CanonicalIVIncrement);
1662+ // TODO: We may need to add the step a number of times if UF > 1
1663+ auto *LastInduction = new VPWidenIntrinsicRecipe (
1664+ VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
1665+ LastInduction->insertBefore (CanonicalIVIncrement);
1666+ VecInd->addIncoming (SteppedStart, VectorPHVPBB);
1667+ VecInd->addIncoming (LastInduction, ExitingVPBB);
1668+ }
1669+
15501670// / Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
15511671// / replaces all uses except the canonical IV increment of
15521672// / VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1592,9 +1712,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
15921712 // The transform updates all users of inductions to work based on EVL, instead
15931713 // of the VF directly. At the moment, widened inductions cannot be updated, so
15941714 // bail out if the plan contains any.
1595- bool ContainsWidenInductions = any_of (
1596- Header->phis (),
1597- IsaPred<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>);
1715+ bool ContainsWidenInductions =
1716+ any_of (Header->phis (), IsaPred<VPWidenPointerInductionRecipe>);
15981717 if (ContainsWidenInductions)
15991718 return false ;
16001719
@@ -1638,6 +1757,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
16381757
16391758 transformRecipestoEVLRecipes (Plan, *VPEVL);
16401759
1760+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1761+ SmallVector<VPRecipeBase *> ToRemove;
1762+ for (VPRecipeBase &Phi : HeaderVPBB->phis ())
1763+ if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
1764+ transformWidenIVRecipestoEVLRecipes (WidenIV, Plan, VPEVL);
1765+ ToRemove.push_back (WidenIV);
1766+ }
1767+ for (VPRecipeBase *R : ToRemove)
1768+ R->eraseFromParent ();
1769+
16411770 // Replace all uses of VPCanonicalIVPHIRecipe by
16421771 // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
16431772 CanonicalIVPHI->replaceAllUsesWith (EVLPhi);
0 commit comments