@@ -10177,20 +10177,19 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
1017710177// / TODO: This is currently overly pessimistic because the loop may not take
1017810178// / the early exit, but better to keep this conservative for now. In future,
1017910179// / it might be possible to relax this by using branch probabilities.
10180- static InstructionCost calculateEarlyExitCost (LoopVectorizationCostModel &CM ,
10180+ static InstructionCost calculateEarlyExitCost (VPCostContext &CostCtx ,
1018110181 VPlan &Plan, ElementCount VF) {
1018210182 InstructionCost Cost = 0 ;
10183- VPCostContext CostCtx (CM.TTI , *CM.TLI , CM.Legal ->getWidestInductionType (), CM,
10184- CM.CostKind );
10185- LLVM_DEBUG (
10186- dbgs () << " Calculating cost of work in vector early exit block:\n " );
1018710183 for (auto *ExitVPBB : Plan.getExitBlocks ()) {
1018810184 for (auto *PredVPBB : ExitVPBB->getPredecessors ()) {
1018910185 // If the predecessor is not the middle.block, then it must be the
1019010186 // vector.early.exit block, which may contain work to calculate the exit
1019110187 // values of variables used outside the loop.
10192- if (PredVPBB != Plan.getMiddleBlock ())
10188+ if (PredVPBB != Plan.getMiddleBlock ()) {
10189+ LLVM_DEBUG (dbgs () << " Calculating cost of work in exit block "
10190+ << PredVPBB->getName () << " :\n " );
1019310191 Cost += PredVPBB->cost (VF, CostCtx);
10192+ }
1019410193 }
1019510194 }
1019610195 return Cost;
@@ -10204,18 +10203,18 @@ static InstructionCost calculateEarlyExitCost(LoopVectorizationCostModel &CM,
1020410203// / extra work when exiting the loop early, such as calculating the final
1020510204// / exit values of variables used outside the loop.
1020610205static bool isOutsideLoopWorkProfitable (GeneratedRTChecks &Checks,
10207- VectorizationFactor &VF,
10208- LoopVectorizationCostModel &CM,
10206+ VectorizationFactor &VF, Loop *L,
1020910207 PredicatedScalarEvolution &PSE,
10210- VPlan &Plan,
10211- ScalarEpilogueLowering SEL) {
10208+ VPCostContext &CostCtx, VPlan &Plan,
10209+ ScalarEpilogueLowering SEL,
10210+ std::optional<unsigned > VScale) {
1021210211 InstructionCost TotalCost = Checks.getCost ();
1021310212 if (!TotalCost.isValid ())
1021410213 return false ;
1021510214
1021610215 // Add on the cost of any work required in the vector early exit block, if
1021710216 // one exists.
10218- TotalCost += calculateEarlyExitCost (CM , Plan, VF.Width );
10217+ TotalCost += calculateEarlyExitCost (CostCtx , Plan, VF.Width );
1021910218
1022010219 // When interleaving only scalar and vector cost will be equal, which in turn
1022110220 // would lead to a divide by 0. Fall back to hard threshold.
@@ -10266,7 +10265,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
1026610265 // For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
1026710266 // the computations are performed on doubles, not integers and the result
1026810267 // is rounded up, hence we get an upper estimate of the TC.
10269- unsigned IntVF = getEstimatedRuntimeVF (VF.Width , CM. getVScaleForTuning () );
10268+ unsigned IntVF = getEstimatedRuntimeVF (VF.Width , VScale );
1027010269 uint64_t RtC = *TotalCost.getValue ();
1027110270 uint64_t Div = ScalarC * IntVF - *VF.Cost .getValue ();
1027210271 uint64_t MinTC1 = Div == 0 ? 0 : divideCeil (RtC * IntVF, Div);
@@ -10294,7 +10293,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
1029410293
1029510294 // Skip vectorization if the expected trip count is less than the minimum
1029610295 // required trip count.
10297- if (auto ExpectedTC = getSmallBestKnownTC (PSE, CM. TheLoop )) {
10296+ if (auto ExpectedTC = getSmallBestKnownTC (PSE, L )) {
1029810297 if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
1029910298 VF.MinProfitableTripCount )) {
1030010299 LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
@@ -10694,9 +10693,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1069410693 // Check if it is profitable to vectorize with runtime checks.
1069510694 bool ForceVectorization =
1069610695 Hints.getForce () == LoopVectorizeHints::FK_Enabled;
10696+ VPCostContext CostCtx (CM.TTI , *CM.TLI , CM.Legal ->getWidestInductionType (),
10697+ CM, CM.CostKind );
1069710698 if (!ForceVectorization &&
10698- !isOutsideLoopWorkProfitable (Checks, VF, CM, PSE,
10699- LVP.getPlanFor (VF.Width ), SEL)) {
10699+ !isOutsideLoopWorkProfitable (Checks, VF, L, PSE, CostCtx,
10700+ LVP.getPlanFor (VF.Width ), SEL,
10701+ CM.getVScaleForTuning ())) {
1070010702 ORE->emit ([&]() {
1070110703 return OptimizationRemarkAnalysisAliasing (
1070210704 DEBUG_TYPE, " CantReorderMemOps" , L->getStartLoc (),
0 commit comments