@@ -947,9 +947,8 @@ class LoopVectorizationCostModel {
947947 // / user options, for the given register kind.
948948 bool useMaxBandwidth (TargetTransformInfo::RegisterKind RegKind);
949949
950- // / \return True if maximizing vector bandwidth is enabled by the target or
951- // / user options, for the given vector factor.
952- bool useMaxBandwidth (ElementCount VF);
950+ // / \return True if register pressure should be calculated for the given VF.
951+ bool shouldCalculateRegPressureForVF (ElementCount VF);
953952
954953 // / \return The size (in bits) of the smallest and widest types in the code
955954 // / that needs to be vectorized. We ignore values that remain scalar such as
@@ -1736,6 +1735,9 @@ class LoopVectorizationCostModel {
17361735 // / Whether this loop should be optimized for size based on function attribute
17371736 // / or profile information.
17381737 bool OptForSize;
1738+
1739+ // / The highest VF possible for this loop, without using MaxBandwidth.
1740+ FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
17391741};
17401742} // end namespace llvm
17411743
@@ -3832,10 +3834,16 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
38323834 return FixedScalableVFPair::getNone ();
38333835}
38343836
3835- bool LoopVectorizationCostModel::useMaxBandwidth (ElementCount VF) {
3836- return useMaxBandwidth (VF.isScalable ()
3837- ? TargetTransformInfo::RGK_ScalableVector
3838- : TargetTransformInfo::RGK_FixedWidthVector);
3837+ bool LoopVectorizationCostModel::shouldCalculateRegPressureForVF (
3838+ ElementCount VF) {
3839+ if (!useMaxBandwidth (VF.isScalable ()
3840+ ? TargetTransformInfo::RGK_ScalableVector
3841+ : TargetTransformInfo::RGK_FixedWidthVector))
3842+ return false ;
3843+ // Only calculate register pressure for VFs enabled by MaxBandwidth.
3844+ return ElementCount::isKnownGT (
3845+ VF, VF.isScalable () ? MaxPermissibleVFWithoutMaxBW.ScalableVF
3846+ : MaxPermissibleVFWithoutMaxBW.FixedVF );
38393847}
38403848
38413849bool LoopVectorizationCostModel::useMaxBandwidth (
@@ -3911,6 +3919,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
39113919 ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
39123920 : TargetTransformInfo::RGK_FixedWidthVector;
39133921 ElementCount MaxVF = MaxVectorElementCount;
3922+
3923+ if (MaxVF.isScalable ())
3924+ MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
3925+ else
3926+ MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
3927+
39143928 if (useMaxBandwidth (RegKind)) {
39153929 auto MaxVectorElementCountMaxBW = ElementCount::get (
39163930 llvm::bit_floor (WidestRegister.getKnownMinValue () / SmallestType),
@@ -4264,9 +4278,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
42644278 if (VF.isScalar ())
42654279 continue ;
42664280
4267- // / Don't consider the VF if it exceeds the number of registers for the
4268- // / target.
4269- if (CM.useMaxBandwidth (VF) && RUs[I].exceedsMaxNumRegs (TTI))
4281+ // / If the VF was proposed due to MaxBandwidth, don't consider the VF if
4282+ // / it exceeds the number of registers for the target.
4283+ if (CM.shouldCalculateRegPressureForVF (VF) &&
4284+ RUs[I].exceedsMaxNumRegs (TTI, ForceTargetNumVectorRegs))
42704285 continue ;
42714286
42724287 InstructionCost C = CM.expectedCost (VF);
@@ -7044,7 +7059,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70447059 InstructionCost Cost = cost (*P, VF);
70457060 VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
70467061
7047- if (CM.useMaxBandwidth (VF) && RUs[I].exceedsMaxNumRegs (TTI)) {
7062+ if (CM.shouldCalculateRegPressureForVF (VF) &&
7063+ RUs[I].exceedsMaxNumRegs (TTI, ForceTargetNumVectorRegs)) {
70487064 LLVM_DEBUG (dbgs () << " LV(REG): Not considering vector loop of width "
70497065 << VF << " because it uses too many registers\n " );
70507066 continue ;
0 commit comments