@@ -4253,9 +4253,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
42534253 return EstimatedVF;
42544254}
42554255
4256- bool LoopVectorizationPlanner::isMoreProfitable (
4257- const VectorizationFactor &A, const VectorizationFactor &B,
4258- const unsigned MaxTripCount) const {
4256+ bool LoopVectorizationPlanner::isMoreProfitable (const VectorizationFactor &A,
4257+ const VectorizationFactor &B,
4258+ const unsigned MaxTripCount,
4259+ bool HasTail) const {
42594260 InstructionCost CostA = A.Cost ;
42604261 InstructionCost CostB = B.Cost ;
42614262
@@ -4293,9 +4294,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
42934294 if (!MaxTripCount)
42944295 return CmpFn (CostA * EstimatedWidthB, CostB * EstimatedWidthA);
42954296
4296- auto GetCostForTC = [MaxTripCount, this ](unsigned VF,
4297- InstructionCost VectorCost,
4298- InstructionCost ScalarCost) {
4297+ auto GetCostForTC = [MaxTripCount, HasTail ](unsigned VF,
4298+ InstructionCost VectorCost,
4299+ InstructionCost ScalarCost) {
42994300 // If the trip count is a known (possibly small) constant, the trip count
43004301 // will be rounded up to an integer number of iterations under
43014302 // FoldTailByMasking. The total cost in that case will be
@@ -4304,20 +4305,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43044305 // some extra overheads, but for the purpose of comparing the costs of
43054306 // different VFs we can use this to compare the total loop-body cost
43064307 // expected after vectorization.
4307- if (CM.foldTailByMasking ())
4308- return VectorCost * divideCeil (MaxTripCount, VF);
4309- return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
4308+ if (HasTail)
4309+ return VectorCost * (MaxTripCount / VF) +
4310+ ScalarCost * (MaxTripCount % VF);
4311+ return VectorCost * divideCeil (MaxTripCount, VF);
43104312 };
43114313
43124314 auto RTCostA = GetCostForTC (EstimatedWidthA, CostA, A.ScalarCost );
43134315 auto RTCostB = GetCostForTC (EstimatedWidthB, CostB, B.ScalarCost );
43144316 return CmpFn (RTCostA, RTCostB);
43154317}
43164318
4317- bool LoopVectorizationPlanner::isMoreProfitable (
4318- const VectorizationFactor &A, const VectorizationFactor &B) const {
4319+ bool LoopVectorizationPlanner::isMoreProfitable (const VectorizationFactor &A,
4320+ const VectorizationFactor &B,
4321+ bool HasTail) const {
43194322 const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4320- return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount);
4323+ return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount,
4324+ HasTail);
43214325}
43224326
43234327void LoopVectorizationPlanner::emitInvalidCostRemarks (
@@ -4607,7 +4611,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46074611 continue ;
46084612 }
46094613
4610- if (isMoreProfitable (Candidate, ChosenFactor))
4614+ if (isMoreProfitable (Candidate, ChosenFactor, P-> hasScalarTail () ))
46114615 ChosenFactor = Candidate;
46124616 }
46134617 }
@@ -4621,7 +4625,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46214625 }
46224626
46234627 LLVM_DEBUG (if (ForceVectorization && !ChosenFactor.Width .isScalar () &&
4624- !isMoreProfitable (ChosenFactor, ScalarCost)) dbgs ()
4628+ !isMoreProfitable (ChosenFactor, ScalarCost,
4629+ !CM.foldTailByMasking ())) dbgs ()
46254630 << " LV: Vectorization seems to be not beneficial, "
46264631 << " but was forced by a user.\n " );
46274632 return ChosenFactor;
@@ -4713,7 +4718,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47134718
47144719 if (EpilogueVectorizationForceVF > 1 ) {
47154720 LLVM_DEBUG (dbgs () << " LEV: Epilogue vectorization factor is forced.\n " );
4716- ElementCount ForcedEC = ElementCount::getFixed (EpilogueVectorizationForceVF);
4721+ ElementCount ForcedEC =
4722+ ElementCount::getFixed (EpilogueVectorizationForceVF);
47174723 if (hasPlanWithVF (ForcedEC))
47184724 return {ForcedEC, 0 , 0 };
47194725
@@ -4787,7 +4793,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47874793 }
47884794
47894795 if (Result.Width .isScalar () ||
4790- isMoreProfitable (NextVF, Result, MaxTripCount))
4796+ isMoreProfitable (NextVF, Result, MaxTripCount, !CM. foldTailByMasking () ))
47914797 Result = NextVF;
47924798 }
47934799
@@ -7540,11 +7546,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
75407546
75417547 InstructionCost Cost = cost (*P, VF);
75427548 VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7543- if (isMoreProfitable (CurrentFactor, BestFactor))
7549+ if (isMoreProfitable (CurrentFactor, BestFactor, P-> hasScalarTail () ))
75447550 BestFactor = CurrentFactor;
75457551
75467552 // If profitable add it to ProfitableVF list.
7547- if (isMoreProfitable (CurrentFactor, ScalarFactor))
7553+ if (isMoreProfitable (CurrentFactor, ScalarFactor, P-> hasScalarTail () ))
75487554 ProfitableVFs.push_back (CurrentFactor);
75497555 }
75507556 }
0 commit comments