@@ -1535,10 +1535,7 @@ class LoopVectorizationCostModel {
15351535 // / Returns true if epilogue vectorization is considered profitable, and
15361536 // / false otherwise.
15371537 // / \p VF is the vectorization factor chosen for the original loop.
1538- // / \p Multiplier is an aditional scaling factor applied to VF before
1539- // / comparing to EpilogueVectorizationMinVF.
1540- bool isEpilogueVectorizationProfitable (const ElementCount VF,
1541- const unsigned IC) const ;
1538+ bool isEpilogueVectorizationProfitable (const ElementCount VF) const ;
15421539
15431540 // / Returns the execution time cost of an instruction for a given vector
15441541 // / width. Vector width of one means scalar.
@@ -4262,11 +4259,12 @@ static unsigned getEstimatedRuntimeVF(const Loop *L,
42624259}
42634260
42644261bool LoopVectorizationPlanner::isMoreProfitable (
4265- const VectorizationFactor &A, const VectorizationFactor &B,
4266- const unsigned MaxTripCount) const {
4262+ const VectorizationFactor &A, const VectorizationFactor &B) const {
42674263 InstructionCost CostA = A.Cost ;
42684264 InstructionCost CostB = B.Cost ;
42694265
4266+ unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4267+
42704268 // Improve estimate for the vector width if it is scalable.
42714269 unsigned EstimatedWidthA = A.Width .getKnownMinValue ();
42724270 unsigned EstimatedWidthB = B.Width .getKnownMinValue ();
@@ -4315,12 +4313,6 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43154313 return CmpFn (RTCostA, RTCostB);
43164314}
43174315
4318- bool LoopVectorizationPlanner::isMoreProfitable (
4319- const VectorizationFactor &A, const VectorizationFactor &B) const {
4320- const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4321- return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount);
4322- }
4323-
43244316void LoopVectorizationPlanner::emitInvalidCostRemarks (
43254317 OptimizationRemarkEmitter *ORE) {
43264318 using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
@@ -4635,7 +4627,7 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
46354627}
46364628
46374629bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable (
4638- const ElementCount VF, const unsigned IC ) const {
4630+ const ElementCount VF) const {
46394631 // FIXME: We need a much better cost-model to take different parameters such
46404632 // as register pressure, code size increase and cost of extra branches into
46414633 // account. For now we apply a very crude heuristic and only consider loops
@@ -4650,15 +4642,12 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
46504642 if (TTI.getMaxInterleaveFactor (VF) <= 1 )
46514643 return false ;
46524644
4653- // TODO: PR #108190 introduced a discrepancy between fixed-width and scalable
4654- // VFs when deciding profitability.
4655- // See related "TODO: extend to support scalable VFs." in
4656- // selectEpilogueVectorizationFactor.
4657- unsigned Multiplier = VF.isFixed () ? IC : 1 ;
4658- unsigned MinVFThreshold = EpilogueVectorizationMinVF.getNumOccurrences () > 0
4659- ? EpilogueVectorizationMinVF
4660- : TTI.getEpilogueVectorizationMinVF ();
4661- return getEstimatedRuntimeVF (TheLoop, TTI, VF * Multiplier) >= MinVFThreshold;
4645+ unsigned Multiplier = 1 ;
4646+ if (VF.isScalable ())
4647+ Multiplier = getVScaleForTuning (TheLoop, TTI).value_or (1 );
4648+ if ((Multiplier * VF.getKnownMinValue ()) >= EpilogueVectorizationMinVF)
4649+ return true ;
4650+ return false ;
46624651}
46634652
46644653VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor (
@@ -4701,7 +4690,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47014690 return Result;
47024691 }
47034692
4704- if (!CM.isEpilogueVectorizationProfitable (MainLoopVF, IC )) {
4693+ if (!CM.isEpilogueVectorizationProfitable (MainLoopVF)) {
47054694 LLVM_DEBUG (dbgs () << " LEV: Epilogue vectorization is not profitable for "
47064695 " this loop\n " );
47074696 return Result;
@@ -4716,20 +4705,16 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47164705 ScalarEvolution &SE = *PSE.getSE ();
47174706 Type *TCType = Legal->getWidestInductionType ();
47184707 const SCEV *RemainingIterations = nullptr ;
4719- unsigned MaxTripCount = 0 ;
47204708 for (auto &NextVF : ProfitableVFs) {
47214709 // Skip candidate VFs without a corresponding VPlan.
47224710 if (!hasPlanWithVF (NextVF.Width ))
47234711 continue ;
47244712
4725- // Skip candidate VFs with widths >= the (estimated) runtime VF (scalable
4726- // vectors) or > the VF of the main loop (fixed vectors).
4713+ // Skip candidate VFs with widths >= the estimate runtime VF (scalable
4714+ // vectors) or the VF of the main loop (fixed vectors).
47274715 if ((!NextVF.Width .isScalable () && MainLoopVF.isScalable () &&
47284716 ElementCount::isKnownGE (NextVF.Width , EstimatedRuntimeVF)) ||
4729- (NextVF.Width .isScalable () &&
4730- ElementCount::isKnownGE (NextVF.Width , MainLoopVF)) ||
4731- (!NextVF.Width .isScalable () && !MainLoopVF.isScalable () &&
4732- ElementCount::isKnownGT (NextVF.Width , MainLoopVF)))
4717+ ElementCount::isKnownGE (NextVF.Width , MainLoopVF))
47334718 continue ;
47344719
47354720 // If NextVF is greater than the number of remaining iterations, the
@@ -4743,14 +4728,6 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47434728 " Trip count SCEV must be computable" );
47444729 RemainingIterations = SE.getURemExpr (
47454730 TC, SE.getConstant (TCType, MainLoopVF.getKnownMinValue () * IC));
4746- MaxTripCount = MainLoopVF.getKnownMinValue () * IC - 1 ;
4747- if (SE.isKnownPredicate (CmpInst::ICMP_ULT, RemainingIterations,
4748- SE.getConstant (TCType, MaxTripCount))) {
4749- MaxTripCount =
4750- SE.getUnsignedRangeMax (RemainingIterations).getZExtValue ();
4751- }
4752- LLVM_DEBUG (dbgs () << " LEV: Maximum Trip Count for Epilogue: "
4753- << MaxTripCount << " \n " );
47544731 }
47554732 if (SE.isKnownPredicate (
47564733 CmpInst::ICMP_UGT,
@@ -4759,8 +4736,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47594736 continue ;
47604737 }
47614738
4762- if (Result.Width .isScalar () ||
4763- isMoreProfitable (NextVF, Result, MaxTripCount))
4739+ if (Result.Width .isScalar () || isMoreProfitable (NextVF, Result))
47644740 Result = NextVF;
47654741 }
47664742
0 commit comments