@@ -956,10 +956,6 @@ class LoopVectorizationCostModel {
956956 return expectedCost (UserVF).isValid ();
957957 }
958958
959- // / \return True if maximizing vector bandwidth is enabled by the target or
960- // / user options.
961- bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
962-
963959 // / \return The size (in bits) of the smallest and widest types in the code
964960 // / that needs to be vectorized. We ignore values that remain scalar such as
965961 // / 64 bit loop indices.
@@ -3922,14 +3918,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39223918 return FixedScalableVFPair::getNone ();
39233919}
39243920
3925- bool LoopVectorizationCostModel::useMaxBandwidth (
3926- TargetTransformInfo::RegisterKind RegKind) {
3927- return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences () == 0 &&
3928- (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3929- (UseWiderVFIfCallVariantsPresent &&
3930- Legal->hasVectorCallVariants ())));
3931- }
3932-
39333921ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget (
39343922 unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39353923 ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3995,7 +3983,10 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39953983 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39963984 : TargetTransformInfo::RGK_FixedWidthVector;
39973985 ElementCount MaxVF = MaxVectorElementCount;
3998- if (useMaxBandwidth (RegKind)) {
3986+ if (MaximizeBandwidth ||
3987+ (MaximizeBandwidth.getNumOccurrences () == 0 &&
3988+ (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3989+ (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants ())))) {
39993990 auto MaxVectorElementCountMaxBW = ElementCount::get (
40003991 llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
40013992 ComputeScalableMaxVF);
@@ -4350,23 +4341,15 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43504341 for (auto &P : VPlans) {
43514342 ArrayRef<ElementCount> VFs (P->vectorFactors ().begin (),
43524343 P->vectorFactors ().end ());
4353- SmallVector<VPRegisterUsage, 8 > RUs;
4354- if (CM.useMaxBandwidth (TargetTransformInfo::RGK_ScalableVector) ||
4355- CM.useMaxBandwidth (TargetTransformInfo::RGK_FixedWidthVector))
4356- RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
4357-
4358- for (unsigned I = 0 ; I < VFs.size (); I++) {
4359- ElementCount VF = VFs[I];
4344+ auto RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
4345+ for (auto [VF, RU] : zip_equal (VFs, RUs)) {
43604346 // The cost for scalar VF=1 is already calculated, so ignore it.
43614347 if (VF.isScalar ())
43624348 continue ;
43634349
43644350 // / Don't consider the VF if it exceeds the number of registers for the
43654351 // / target.
4366- if (CM.useMaxBandwidth (VF.isScalable ()
4367- ? TargetTransformInfo::RGK_ScalableVector
4368- : TargetTransformInfo::RGK_FixedWidthVector) &&
4369- RUs[I].exceedsMaxNumRegs (TTI))
4352+ if (RU.exceedsMaxNumRegs (TTI))
43704353 continue ;
43714354
43724355 InstructionCost C = CM.expectedCost (VF);
@@ -7113,14 +7096,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71137096 for (auto &P : VPlans) {
71147097 ArrayRef<ElementCount> VFs (P->vectorFactors ().begin (),
71157098 P->vectorFactors ().end ());
7116-
7117- SmallVector<VPRegisterUsage, 8 > RUs;
7118- if (CM.useMaxBandwidth (TargetTransformInfo::RGK_ScalableVector) ||
7119- CM.useMaxBandwidth (TargetTransformInfo::RGK_FixedWidthVector))
7120- RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
7121-
7122- for (unsigned I = 0 ; I < VFs.size (); I++) {
7123- ElementCount VF = VFs[I];
7099+ auto RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
7100+ for (auto [VF, RU] : zip_equal (VFs, RUs)) {
71247101 if (VF.isScalar ())
71257102 continue ;
71267103 if (!ForceVectorization && !willGenerateVectors (*P, VF, TTI)) {
@@ -7142,10 +7119,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71427119 InstructionCost Cost = cost (*P, VF);
71437120 VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
71447121
7145- if (CM.useMaxBandwidth (VF.isScalable ()
7146- ? TargetTransformInfo::RGK_ScalableVector
7147- : TargetTransformInfo::RGK_FixedWidthVector) &&
7148- RUs[I].exceedsMaxNumRegs (TTI)) {
7122+ if (RU.exceedsMaxNumRegs (TTI)) {
71497123 LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
71507124 << VF << " because it uses too many registers\n " );
71517125 continue ;
0 commit comments