@@ -3956,6 +3956,9 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
39563956 // used.
39573957 // TODO: Assert that they aren't used.
39583958
3959+ VPValue *UF = Plan.getOrAddLiveIn (ConstantInt::get (TCTy, Plan.getUF ()));
3960+ Plan.getSymbolicUF ().replaceAllUsesWith (UF);
3961+
39593962 // If there are no users of the runtime VF, compute VFxUF by constant folding
39603963 // the multiplication of VF and UF.
39613964 if (VF.getNumUsers () == 0 ) {
@@ -3975,7 +3978,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
39753978 }
39763979 VF.replaceAllUsesWith (RuntimeVF);
39773980
3978- VPValue *UF = Plan.getOrAddLiveIn (ConstantInt::get (TCTy, Plan.getUF ()));
39793981 VPValue *MulByUF = Builder.createNaryOp (Instruction::Mul, {RuntimeVF, UF});
39803982 VFxUF.replaceAllUsesWith (MulByUF);
39813983}
@@ -4043,14 +4045,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
40434045 return false ;
40444046}
40454047
4046- // / Returns true if \p IR is a full interleave group with factor and number of
4047- // / members both equal to \p VF. The interleave group must also access the full
4048- // / vector width \p VectorRegWidth .
4049- static bool isConsecutiveInterleaveGroup (VPInterleaveRecipe *InterleaveR,
4050- unsigned VF, VPTypeAnalysis &TypeInfo ,
4051- unsigned VectorRegWidth ) {
4048+ // / Returns VF from \p VFs if \p IR is a full interleave group with factor and
4049+ // / number of members both equal to VF. The interleave group must also access
4050+ // / the full vector width .
4051+ static std::optional<ElementCount> isConsecutiveInterleaveGroup (
4052+ VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs ,
4053+ VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI ) {
40524054 if (!InterleaveR)
4053- return false ;
4055+ return std:: nullopt ;
40544056
40554057 Type *GroupElementTy = nullptr ;
40564058 if (InterleaveR->getStoredValues ().empty ()) {
@@ -4059,21 +4061,35 @@ static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
40594061 [&TypeInfo, GroupElementTy](VPValue *Op) {
40604062 return TypeInfo.inferScalarType (Op) == GroupElementTy;
40614063 }))
4062- return false ;
4064+ return std:: nullopt ;
40634065 } else {
40644066 GroupElementTy =
40654067 TypeInfo.inferScalarType (InterleaveR->getStoredValues ()[0 ]);
40664068 if (!all_of (InterleaveR->getStoredValues (),
40674069 [&TypeInfo, GroupElementTy](VPValue *Op) {
40684070 return TypeInfo.inferScalarType (Op) == GroupElementTy;
40694071 }))
4070- return false ;
4072+ return std:: nullopt ;
40714073 }
40724074
4073- unsigned GroupSize = GroupElementTy->getScalarSizeInBits () * VF;
4074- auto IG = InterleaveR->getInterleaveGroup ();
4075- return IG->getFactor () == VF && IG->getNumMembers () == VF &&
4076- GroupSize == VectorRegWidth;
4075+ auto GetVectorWidthForVF = [&TTI](ElementCount VF) {
4076+ TypeSize Size = TTI.getRegisterBitWidth (
4077+ VF.isFixed () ? TargetTransformInfo::RGK_FixedWidthVector
4078+ : TargetTransformInfo::RGK_ScalableVector);
4079+ assert (Size.isScalable () == VF.isScalable () &&
4080+ " if Size is scalable, VF must to and vice versa" );
4081+ return Size.getKnownMinValue ();
4082+ };
4083+
4084+ for (ElementCount VF : VFs) {
4085+ unsigned MinVal = VF.getKnownMinValue ();
4086+ unsigned GroupSize = GroupElementTy->getScalarSizeInBits () * MinVal;
4087+ auto IG = InterleaveR->getInterleaveGroup ();
4088+ if (IG->getFactor () == MinVal && IG->getNumMembers () == MinVal &&
4089+ GroupSize == GetVectorWidthForVF (VF))
4090+ return {VF};
4091+ }
4092+ return std::nullopt ;
40774093}
40784094
40794095// / Returns true if \p VPValue is a narrow VPValue.
@@ -4084,16 +4100,18 @@ static bool isAlreadyNarrow(VPValue *VPV) {
40844100 return RepR && RepR->isSingleScalar ();
40854101}
40864102
4087- void VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF,
4088- unsigned VectorRegWidth) {
4103+ std::unique_ptr<VPlan>
4104+ VPlanTransforms::narrowInterleaveGroups (VPlan &Plan,
4105+ const TargetTransformInfo &TTI) {
4106+ using namespace llvm ::VPlanPatternMatch;
40894107 VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion ();
4108+
40904109 if (!VectorLoop)
4091- return ;
4110+ return nullptr ;
40924111
40934112 VPTypeAnalysis TypeInfo (Plan);
4094-
4095- unsigned VFMinVal = VF.getKnownMinValue ();
40964113 SmallVector<VPInterleaveRecipe *> StoreGroups;
4114+ std::optional<ElementCount> VFToOptimize;
40974115 for (auto &R : *VectorLoop->getEntryBasicBlock ()) {
40984116 if (isa<VPCanonicalIVPHIRecipe>(&R) || match (&R, m_BranchOnCount ()))
40994117 continue ;
@@ -4107,30 +4125,33 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
41074125 // * recipes writing to memory except interleave groups
41084126 // Only support plans with a canonical induction phi.
41094127 if (R.isPhi ())
4110- return ;
4128+ return nullptr ;
41114129
41124130 auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
41134131 if (R.mayWriteToMemory () && !InterleaveR)
4114- return ;
4115-
4116- // Do not narrow interleave groups if there are VectorPointer recipes and
4117- // the plan was unrolled. The recipe implicitly uses VF from
4118- // VPTransformState.
4119- // TODO: Remove restriction once the VF for the VectorPointer offset is
4120- // modeled explicitly as operand.
4121- if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF () > 1 )
4122- return ;
4132+ return nullptr ;
41234133
41244134 // All other ops are allowed, but we reject uses that cannot be converted
41254135 // when checking all allowed consumers (store interleave groups) below.
41264136 if (!InterleaveR)
41274137 continue ;
41284138
4129- // Bail out on non-consecutive interleave groups.
4130- if (!isConsecutiveInterleaveGroup (InterleaveR, VFMinVal, TypeInfo,
4131- VectorRegWidth))
4132- return ;
4133-
4139+ // Try to find a single VF, where all interleave groups are consecutive and
4140+ // saturate the full vector width. If we already have a candidate VF, check
4141+ // if it is applicable for the current InterleaveR, otherwise look for a
4142+ // suitable VF across the Plans VFs.
4143+ //
4144+ if (VFToOptimize) {
4145+ if (!isConsecutiveInterleaveGroup (InterleaveR, {*VFToOptimize}, TypeInfo,
4146+ TTI))
4147+ return nullptr ;
4148+ } else {
4149+ if (auto VF = isConsecutiveInterleaveGroup (
4150+ InterleaveR, to_vector (Plan.vectorFactors ()), TypeInfo, TTI))
4151+ VFToOptimize = *VF;
4152+ else
4153+ return nullptr ;
4154+ }
41344155 // Skip read interleave groups.
41354156 if (InterleaveR->getStoredValues ().empty ())
41364157 continue ;
@@ -4164,24 +4185,34 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
41644185 auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
41654186 InterleaveR->getStoredValues ()[0 ]->getDefiningRecipe ());
41664187 if (!WideMember0)
4167- return ;
4188+ return nullptr ;
41684189 for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues ())) {
41694190 auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe ());
41704191 if (!R || R->getOpcode () != WideMember0->getOpcode () ||
41714192 R->getNumOperands () > 2 )
4172- return ;
4193+ return nullptr ;
41734194 if (any_of (enumerate(R->operands ()),
41744195 [WideMember0, Idx = I](const auto &P) {
41754196 const auto &[OpIdx, OpV] = P;
41764197 return !canNarrowLoad (WideMember0, OpIdx, OpV, Idx);
41774198 }))
4178- return ;
4199+ return nullptr ;
41794200 }
41804201 StoreGroups.push_back (InterleaveR);
41814202 }
41824203
41834204 if (StoreGroups.empty ())
4184- return ;
4205+ return nullptr ;
4206+
4207+ // All interleave groups in Plan can be narrowed for VFToOptimize. Split the
4208+ // original Plan into 2: a) a new clone which contains all VFs of Plan, except
4209+ // VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
4210+ std::unique_ptr<VPlan> NewPlan;
4211+ if (size (Plan.vectorFactors ()) != 1 ) {
4212+ NewPlan = std::unique_ptr<VPlan>(Plan.duplicate ());
4213+ Plan.setVF (*VFToOptimize);
4214+ NewPlan->removeVF (*VFToOptimize);
4215+ }
41854216
41864217 // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
41874218 SmallPtrSet<VPValue *, 4 > NarrowedOps;
@@ -4252,9 +4283,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
42524283 auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
42534284 VPBuilder PHBuilder (Plan.getVectorPreheader ());
42544285
4255- VPValue *UF = Plan.getOrAddLiveIn (
4256- ConstantInt::get (CanIV->getScalarType (), 1 * Plan.getUF ()));
4257- if (VF.isScalable ()) {
4286+ VPValue *UF = &Plan.getSymbolicUF ();
4287+ if (VFToOptimize->isScalable ()) {
42584288 VPValue *VScale = PHBuilder.createElementCount (
42594289 CanIV->getScalarType (), ElementCount::getScalable (1 ));
42604290 VPValue *VScaleUF = PHBuilder.createNaryOp (Instruction::Mul, {VScale, UF});
@@ -4266,6 +4296,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
42664296 Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
42674297 }
42684298 removeDeadRecipes (Plan);
4299+ assert (none_of (*VectorLoop->getEntryBasicBlock (),
4300+ IsaPred<VPVectorPointerRecipe>) &&
4301+ " All VPVectorPointerRecipes should have been removed" );
4302+ return NewPlan;
42694303}
42704304
42714305// / Add branch weight metadata, if the \p Plan's middle block is terminated by a
0 commit comments