@@ -4256,9 +4256,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
42564256 return EstimatedVF;
42574257}
42584258
4259- bool LoopVectorizationPlanner::isMoreProfitable (
4260- const VectorizationFactor &A, const VectorizationFactor &B,
4261- const unsigned MaxTripCount) const {
4259+ bool LoopVectorizationPlanner::isMoreProfitable (const VectorizationFactor &A,
4260+ const VectorizationFactor &B,
4261+ const unsigned MaxTripCount,
4262+ bool HasTail) const {
42624263 InstructionCost CostA = A.Cost ;
42634264 InstructionCost CostB = B.Cost ;
42644265
@@ -4296,9 +4297,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
42964297 if (!MaxTripCount)
42974298 return CmpFn (CostA * EstimatedWidthB, CostB * EstimatedWidthA);
42984299
4299- auto GetCostForTC = [MaxTripCount, this ](unsigned VF,
4300- InstructionCost VectorCost,
4301- InstructionCost ScalarCost) {
4300+ auto GetCostForTC = [MaxTripCount, HasTail ](unsigned VF,
4301+ InstructionCost VectorCost,
4302+ InstructionCost ScalarCost) {
43024303 // If the trip count is a known (possibly small) constant, the trip count
43034304 // will be rounded up to an integer number of iterations under
43044305 // FoldTailByMasking. The total cost in that case will be
@@ -4307,20 +4308,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43074308 // some extra overheads, but for the purpose of comparing the costs of
43084309 // different VFs we can use this to compare the total loop-body cost
43094310 // expected after vectorization.
4310- if (CM.foldTailByMasking ())
4311- return VectorCost * divideCeil (MaxTripCount, VF);
4312- return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
4311+ if (HasTail)
4312+ return VectorCost * (MaxTripCount / VF) +
4313+ ScalarCost * (MaxTripCount % VF);
4314+ return VectorCost * divideCeil (MaxTripCount, VF);
43134315 };
43144316
43154317 auto RTCostA = GetCostForTC (EstimatedWidthA, CostA, A.ScalarCost );
43164318 auto RTCostB = GetCostForTC (EstimatedWidthB, CostB, B.ScalarCost );
43174319 return CmpFn (RTCostA, RTCostB);
43184320}
43194321
4320- bool LoopVectorizationPlanner::isMoreProfitable (
4321- const VectorizationFactor &A, const VectorizationFactor &B) const {
4322+ bool LoopVectorizationPlanner::isMoreProfitable (const VectorizationFactor &A,
4323+ const VectorizationFactor &B,
4324+ bool HasTail) const {
43224325 const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4323- return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount);
4326+ return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount,
4327+ HasTail);
43244328}
43254329
43264330void LoopVectorizationPlanner::emitInvalidCostRemarks (
@@ -4609,7 +4613,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46094613 continue ;
46104614 }
46114615
4612- if (isMoreProfitable (Candidate, ChosenFactor))
4616+ if (isMoreProfitable (Candidate, ChosenFactor, P-> hasScalarTail () ))
46134617 ChosenFactor = Candidate;
46144618 }
46154619 }
@@ -4623,7 +4627,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46234627 }
46244628
46254629 LLVM_DEBUG (if (ForceVectorization && !ChosenFactor.Width .isScalar () &&
4626- !isMoreProfitable (ChosenFactor, ScalarCost)) dbgs ()
4630+ !isMoreProfitable (ChosenFactor, ScalarCost,
4631+ !CM.foldTailByMasking ())) dbgs ()
46274632 << " LV: Vectorization seems to be not beneficial, "
46284633 << " but was forced by a user.\n " );
46294634 return ChosenFactor;
@@ -4789,7 +4794,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47894794 }
47904795
47914796 if (Result.Width .isScalar () ||
4792- isMoreProfitable (NextVF, Result, MaxTripCount))
4797+ isMoreProfitable (NextVF, Result, MaxTripCount, !CM. foldTailByMasking () ))
47934798 Result = NextVF;
47944799 }
47954800
@@ -7768,11 +7773,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
77687773
77697774 InstructionCost Cost = cost (*P, VF);
77707775 VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7771- if (isMoreProfitable (CurrentFactor, BestFactor))
7776+ if (isMoreProfitable (CurrentFactor, BestFactor, P-> hasScalarTail () ))
77727777 BestFactor = CurrentFactor;
77737778
77747779 // If profitable add it to ProfitableVF list.
7775- if (isMoreProfitable (CurrentFactor, ScalarFactor))
7780+ if (isMoreProfitable (CurrentFactor, ScalarFactor, P-> hasScalarTail () ))
77767781 ProfitableVFs.push_back (CurrentFactor);
77777782 }
77787783 }
0 commit comments