@@ -956,6 +956,10 @@ class LoopVectorizationCostModel {
956956 return expectedCost (UserVF).isValid ();
957957 }
958958
959+ // / \return True if maximizing vector bandwidth is enabled by the target or
960+ // / user options.
961+ bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
962+
959963 // / \return The size (in bits) of the smallest and widest types in the code
960964 // / that needs to be vectorized. We ignore values that remain scalar such as
961965 // / 64 bit loop indices.
@@ -3918,6 +3922,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39183922 return FixedScalableVFPair::getNone ();
39193923}
39203924
3925+ bool LoopVectorizationCostModel::useMaxBandwidth (
3926+ TargetTransformInfo::RegisterKind RegKind) {
3927+ return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences () == 0 &&
3928+ (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3929+ (UseWiderVFIfCallVariantsPresent &&
3930+ Legal->hasVectorCallVariants ())));
3931+ }
3932+
39213933ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget (
39223934 unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
39233935 ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -3983,10 +3995,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39833995 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39843996 : TargetTransformInfo::RGK_FixedWidthVector;
39853997 ElementCount MaxVF = MaxVectorElementCount;
3986- if (MaximizeBandwidth ||
3987- (MaximizeBandwidth.getNumOccurrences () == 0 &&
3988- (TTI.shouldMaximizeVectorBandwidth (RegKind) ||
3989- (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants ())))) {
3998+ if (useMaxBandwidth (RegKind)) {
39903999 auto MaxVectorElementCountMaxBW = ElementCount::get (
39914000 llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
39924001 ComputeScalableMaxVF);
@@ -4349,7 +4358,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43494358
43504359 // / Don't consider the VF if it exceeds the number of registers for the
43514360 // / target.
4352- if (RU.exceedsMaxNumRegs (TTI))
4361+ if (CM.useMaxBandwidth (VF.isScalable ()
4362+ ? TargetTransformInfo::RGK_ScalableVector
4363+ : TargetTransformInfo::RGK_FixedWidthVector) &&
4364+ RU.exceedsMaxNumRegs (TTI))
43534365 continue ;
43544366
43554367 InstructionCost C = CM.expectedCost (VF);
@@ -7119,7 +7131,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71197131 InstructionCost Cost = cost (*P, VF);
71207132 VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
71217133
7122- if (RU.exceedsMaxNumRegs (TTI)) {
7134+ if (CM.useMaxBandwidth (VF.isScalable ()
7135+ ? TargetTransformInfo::RGK_ScalableVector
7136+ : TargetTransformInfo::RGK_FixedWidthVector) &&
7137+ RU.exceedsMaxNumRegs (TTI)) {
71237138 LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
71247139 << VF << " because it uses too many registers\n " );
71257140 continue ;
0 commit comments