@@ -956,6 +956,10 @@ class LoopVectorizationCostModel {
956956 return expectedCost (UserVF).isValid ();
957957 }
958958
959+ // / \return True if maximizing vector bandwidth is enabled by the target or
960+ // / user options.
961+ bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
962+
959963 // / \return The size (in bits) of the smallest and widest types in the code
960964 // / that needs to be vectorized. We ignore values that remain scalar such as
961965 // / 64 bit loop indices.
@@ -3918,6 +3922,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39183922 return FixedScalableVFPair::getNone ();
39193923}
39203924
3925+ bool LoopVectorizationCostModel::useMaxBandwidth (
3926+ TargetTransformInfo::RegisterKind RegKind) {
3927+ return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences () == 0 &&
3928+ (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3929+ (UseWiderVFIfCallVariantsPresent &&
3930+ Legal->hasVectorCallVariants ())));
3931+ }
3932+
39213933ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget (
39223934 unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39233935 ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3983,10 +3995,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39833995 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39843996 : TargetTransformInfo::RGK_FixedWidthVector;
39853997 ElementCount MaxVF = MaxVectorElementCount;
3986- if (MaximizeBandwidth ||
3987- (MaximizeBandwidth.getNumOccurrences () == 0 &&
3988- (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3989- (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants ())))) {
3998+ if (useMaxBandwidth (RegKind)) {
39903999 auto MaxVectorElementCountMaxBW = ElementCount::get (
39914000 llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
39924001 ComputeScalableMaxVF);
@@ -4341,15 +4350,24 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43414350 for (auto &P : VPlans) {
43424351 ArrayRef<ElementCount> VFs (P->vectorFactors ().begin (),
43434352 P->vectorFactors ().end ());
4344- auto RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
4345- for (auto [VF, RU] : zip_equal (VFs, RUs)) {
4353+
4354+ SmallVector<VPRegisterUsage, 8 > RUs;
4355+ if (CM.useMaxBandwidth (TargetTransformInfo::RGK_ScalableVector) ||
4356+ CM.useMaxBandwidth (TargetTransformInfo::RGK_FixedWidthVector))
4357+ RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
4358+
4359+ for (unsigned I = 0 ; I < VFs.size (); I++) {
4360+ ElementCount VF = VFs[I];
43464361 // The cost for scalar VF=1 is already calculated, so ignore it.
43474362 if (VF.isScalar ())
43484363 continue ;
43494364
43504365 // / Don't consider the VF if it exceeds the number of registers for the
43514366 // / target.
4352- if (RU.exceedsMaxNumRegs (TTI))
4367+ if (CM.useMaxBandwidth (VF.isScalable ()
4368+ ? TargetTransformInfo::RGK_ScalableVector
4369+ : TargetTransformInfo::RGK_FixedWidthVector) &&
4370+ RUs[I].exceedsMaxNumRegs (TTI))
43534371 continue ;
43544372
43554373 InstructionCost C = CM.expectedCost (VF);
@@ -7096,8 +7114,14 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70967114 for (auto &P : VPlans) {
70977115 ArrayRef<ElementCount> VFs (P->vectorFactors ().begin (),
70987116 P->vectorFactors ().end ());
7099- auto RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
7100- for (auto [VF, RU] : zip_equal (VFs, RUs)) {
7117+
7118+ SmallVector<VPRegisterUsage, 8 > RUs;
7119+ if (CM.useMaxBandwidth (TargetTransformInfo::RGK_ScalableVector) ||
7120+ CM.useMaxBandwidth (TargetTransformInfo::RGK_FixedWidthVector))
7121+ RUs = calculateRegisterUsageForPlan (*P, VFs, TTI, CM.ValuesToIgnore );
7122+
7123+ for (unsigned I = 0 ; I < VFs.size (); I++) {
7124+ ElementCount VF = VFs[I];
71017125 if (VF.isScalar ())
71027126 continue ;
71037127 if (!ForceVectorization && !willGenerateVectors (*P, VF, TTI)) {
@@ -7119,7 +7143,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71197143 InstructionCost Cost = cost (*P, VF);
71207144 VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
71217145
7122- if (RU.exceedsMaxNumRegs (TTI)) {
7146+ if (CM.useMaxBandwidth (VF.isScalable ()
7147+ ? TargetTransformInfo::RGK_ScalableVector
7148+ : TargetTransformInfo::RGK_FixedWidthVector) &&
7149+ RUs[I].exceedsMaxNumRegs (TTI)) {
71237150 LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
71247151 << VF << " because it uses too many registers\n " );
71257152 continue ;
0 commit comments