@@ -3956,9 +3956,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
39563956 // used.
39573957 // TODO: Assert that they aren't used.
39583958
3959- VPValue *UF = Plan.getOrAddLiveIn (ConstantInt::get (TCTy, Plan.getUF ()));
3960- Plan.getSymbolicUF ().replaceAllUsesWith (UF);
3961-
39623959 // If there are no users of the runtime VF, compute VFxUF by constant folding
39633960 // the multiplication of VF and UF.
39643961 if (VF.getNumUsers () == 0 ) {
@@ -3978,6 +3975,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
39783975 }
39793976 VF.replaceAllUsesWith (RuntimeVF);
39803977
3978+ VPValue *UF = Plan.getOrAddLiveIn (ConstantInt::get (TCTy, Plan.getUF ()));
39813979 VPValue *MulByUF = Builder.createNaryOp (Instruction::Mul, {RuntimeVF, UF});
39823980 VFxUF.replaceAllUsesWith (MulByUF);
39833981}
@@ -4045,14 +4043,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
40454043 return false ;
40464044}
40474045
4048- // / Returns VF from \p VFs if \p IR is a full interleave group with factor and
4049- // / number of members both equal to VF. The interleave group must also access
4050- // / the full vector width.
4051- static std::optional<ElementCount> isConsecutiveInterleaveGroup (
4052- VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs ,
4053- VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI ) {
4046+ // / Returns true if \p IR is a full interleave group with factor and number of
4047+ // / members both equal to \p VF. The interleave group must also access the full
4048+ // / vector width \p VectorRegWidth .
4049+ static bool isConsecutiveInterleaveGroup (VPInterleaveRecipe *InterleaveR,
4050+ unsigned VF, VPTypeAnalysis &TypeInfo ,
4051+ unsigned VectorRegWidth ) {
40544052 if (!InterleaveR || InterleaveR->getMask ())
4055- return std:: nullopt ;
4053+ return false ;
40564054
40574055 Type *GroupElementTy = nullptr ;
40584056 if (InterleaveR->getStoredValues ().empty ()) {
@@ -4061,35 +4059,21 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup(
40614059 [&TypeInfo, GroupElementTy](VPValue *Op) {
40624060 return TypeInfo.inferScalarType (Op) == GroupElementTy;
40634061 }))
4064- return std:: nullopt ;
4062+ return false ;
40654063 } else {
40664064 GroupElementTy =
40674065 TypeInfo.inferScalarType (InterleaveR->getStoredValues ()[0 ]);
40684066 if (!all_of (InterleaveR->getStoredValues (),
40694067 [&TypeInfo, GroupElementTy](VPValue *Op) {
40704068 return TypeInfo.inferScalarType (Op) == GroupElementTy;
40714069 }))
4072- return std:: nullopt ;
4070+ return false ;
40734071 }
40744072
4075- auto GetVectorWidthForVF = [&TTI](ElementCount VF) {
4076- TypeSize Size = TTI.getRegisterBitWidth (
4077- VF.isFixed () ? TargetTransformInfo::RGK_FixedWidthVector
4078- : TargetTransformInfo::RGK_ScalableVector);
4079- assert (Size.isScalable () == VF.isScalable () &&
4080- " if Size is scalable, VF must to and vice versa" );
4081- return Size.getKnownMinValue ();
4082- };
4083-
4084- for (ElementCount VF : VFs) {
4085- unsigned MinVal = VF.getKnownMinValue ();
4086- unsigned GroupSize = GroupElementTy->getScalarSizeInBits () * MinVal;
4087- auto IG = InterleaveR->getInterleaveGroup ();
4088- if (IG->getFactor () == MinVal && IG->getNumMembers () == MinVal &&
4089- GroupSize == GetVectorWidthForVF (VF))
4090- return {VF};
4091- }
4092- return std::nullopt ;
4073+ unsigned GroupSize = GroupElementTy->getScalarSizeInBits () * VF;
4074+ auto IG = InterleaveR->getInterleaveGroup ();
4075+ return IG->getFactor () == VF && IG->getNumMembers () == VF &&
4076+ GroupSize == VectorRegWidth;
40934077}
40944078
40954079// / Returns true if \p VPValue is a narrow VPValue.
@@ -4100,18 +4084,16 @@ static bool isAlreadyNarrow(VPValue *VPV) {
41004084 return RepR && RepR->isSingleScalar ();
41014085}
41024086
4103- std::unique_ptr<VPlan>
4104- VPlanTransforms::narrowInterleaveGroups (VPlan &Plan,
4105- const TargetTransformInfo &TTI) {
4106- using namespace llvm ::VPlanPatternMatch;
4087+ void VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF,
4088+ unsigned VectorRegWidth) {
41074089 VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion ();
4108-
41094090 if (!VectorLoop)
4110- return nullptr ;
4091+ return ;
41114092
41124093 VPTypeAnalysis TypeInfo (Plan);
4094+
4095+ unsigned VFMinVal = VF.getKnownMinValue ();
41134096 SmallVector<VPInterleaveRecipe *> StoreGroups;
4114- std::optional<ElementCount> VFToOptimize;
41154097 for (auto &R : *VectorLoop->getEntryBasicBlock ()) {
41164098 if (isa<VPCanonicalIVPHIRecipe>(&R) || match (&R, m_BranchOnCount ()))
41174099 continue ;
@@ -4125,33 +4107,30 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
41254107 // * recipes writing to memory except interleave groups
41264108 // Only support plans with a canonical induction phi.
41274109 if (R.isPhi ())
4128- return nullptr ;
4110+ return ;
41294111
41304112 auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
41314113 if (R.mayWriteToMemory () && !InterleaveR)
4132- return nullptr ;
4114+ return ;
4115+
4116+ // Do not narrow interleave groups if there are VectorPointer recipes and
4117+ // the plan was unrolled. The recipe implicitly uses VF from
4118+ // VPTransformState.
4119+ // TODO: Remove restriction once the VF for the VectorPointer offset is
4120+ // modeled explicitly as operand.
4121+ if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF () > 1 )
4122+ return ;
41334123
41344124 // All other ops are allowed, but we reject uses that cannot be converted
41354125 // when checking all allowed consumers (store interleave groups) below.
41364126 if (!InterleaveR)
41374127 continue ;
41384128
4139- // Try to find a single VF, where all interleave groups are consecutive and
4140- // saturate the full vector width. If we already have a candidate VF, check
4141- // if it is applicable for the current InterleaveR, otherwise look for a
4142- // suitable VF across the Plans VFs.
4143- //
4144- if (VFToOptimize) {
4145- if (!isConsecutiveInterleaveGroup (InterleaveR, {*VFToOptimize}, TypeInfo,
4146- TTI))
4147- return nullptr ;
4148- } else {
4149- if (auto VF = isConsecutiveInterleaveGroup (
4150- InterleaveR, to_vector (Plan.vectorFactors ()), TypeInfo, TTI))
4151- VFToOptimize = *VF;
4152- else
4153- return nullptr ;
4154- }
4129+ // Bail out on non-consecutive interleave groups.
4130+ if (!isConsecutiveInterleaveGroup (InterleaveR, VFMinVal, TypeInfo,
4131+ VectorRegWidth))
4132+ return ;
4133+
41554134 // Skip read interleave groups.
41564135 if (InterleaveR->getStoredValues ().empty ())
41574136 continue ;
@@ -4185,34 +4164,24 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
41854164 auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
41864165 InterleaveR->getStoredValues ()[0 ]->getDefiningRecipe ());
41874166 if (!WideMember0)
4188- return nullptr ;
4167+ return ;
41894168 for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues ())) {
41904169 auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe ());
41914170 if (!R || R->getOpcode () != WideMember0->getOpcode () ||
41924171 R->getNumOperands () > 2 )
4193- return nullptr ;
4172+ return ;
41944173 if (any_of (enumerate(R->operands ()),
41954174 [WideMember0, Idx = I](const auto &P) {
41964175 const auto &[OpIdx, OpV] = P;
41974176 return !canNarrowLoad (WideMember0, OpIdx, OpV, Idx);
41984177 }))
4199- return nullptr ;
4178+ return ;
42004179 }
42014180 StoreGroups.push_back (InterleaveR);
42024181 }
42034182
42044183 if (StoreGroups.empty ())
4205- return nullptr ;
4206-
4207- // All interleave groups in Plan can be narrowed for VFToOptimize. Split the
4208- // original Plan into 2: a) a new clone which contains all VFs of Plan, except
4209- // VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
4210- std::unique_ptr<VPlan> NewPlan;
4211- if (size (Plan.vectorFactors ()) != 1 ) {
4212- NewPlan = std::unique_ptr<VPlan>(Plan.duplicate ());
4213- Plan.setVF (*VFToOptimize);
4214- NewPlan->removeVF (*VFToOptimize);
4215- }
4184+ return ;
42164185
42174186 // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
42184187 SmallPtrSet<VPValue *, 4 > NarrowedOps;
@@ -4283,8 +4252,9 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
42834252 auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
42844253 VPBuilder PHBuilder (Plan.getVectorPreheader ());
42854254
4286- VPValue *UF = &Plan.getSymbolicUF ();
4287- if (VFToOptimize->isScalable ()) {
4255+ VPValue *UF = Plan.getOrAddLiveIn (
4256+ ConstantInt::get (CanIV->getScalarType (), 1 * Plan.getUF ()));
4257+ if (VF.isScalable ()) {
42884258 VPValue *VScale = PHBuilder.createElementCount (
42894259 CanIV->getScalarType (), ElementCount::getScalable (1 ));
42904260 VPValue *VScaleUF = PHBuilder.createNaryOp (Instruction::Mul, {VScale, UF});
@@ -4296,10 +4266,6 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
42964266 Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
42974267 }
42984268 removeDeadRecipes (Plan);
4299- assert (none_of (*VectorLoop->getEntryBasicBlock (),
4300- IsaPred<VPVectorPointerRecipe>) &&
4301- " All VPVectorPointerRecipes should have been removed" );
4302- return NewPlan;
43034269}
43044270
43054271// / Add branch weight metadata, if the \p Plan's middle block is terminated by a
0 commit comments