@@ -411,10 +411,10 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
411411// / 3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
412412// / 4) Returns std::nullopt if all of the above failed.
413413static std::optional<unsigned >
414- getSmallBestKnownTC (ScalarEvolution &SE , Loop *L,
414+ getSmallBestKnownTC (PredicatedScalarEvolution &PSE , Loop *L,
415415 bool CanUseConstantMax = true ) {
416416 // Check if exact trip count is known.
417- if (unsigned ExpectedTC = SE. getSmallConstantTripCount (L))
417+ if (unsigned ExpectedTC = PSE. getSE ()-> getSmallConstantTripCount (L))
418418 return ExpectedTC;
419419
420420 // Check if there is an expected trip count available from profile data.
@@ -426,7 +426,7 @@ getSmallBestKnownTC(ScalarEvolution &SE, Loop *L,
426426 return std::nullopt ;
427427
428428 // Check if upper bound estimate is known.
429- if (unsigned ExpectedTC = SE .getSmallConstantMaxTripCount (L ))
429+ if (unsigned ExpectedTC = PSE .getSmallConstantMaxTripCount ())
430430 return ExpectedTC;
431431
432432 return std::nullopt ;
@@ -1789,12 +1789,15 @@ class GeneratedRTChecks {
17891789
17901790 Loop *OuterLoop = nullptr ;
17911791
1792+ PredicatedScalarEvolution &PSE;
1793+
17921794public:
1793- GeneratedRTChecks (ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
1794- TargetTransformInfo *TTI, const DataLayout &DL,
1795- bool AddBranchWeights)
1796- : DT(DT), LI(LI), TTI(TTI), SCEVExp(SE, DL, " scev.check" ),
1797- MemCheckExp (SE, DL, " scev.check" ), AddBranchWeights(AddBranchWeights) {}
1795+ GeneratedRTChecks (PredicatedScalarEvolution &PSE, DominatorTree *DT,
1796+ LoopInfo *LI, TargetTransformInfo *TTI,
1797+ const DataLayout &DL, bool AddBranchWeights)
1798+ : DT(DT), LI(LI), TTI(TTI), SCEVExp(*PSE.getSE(), DL, " scev.check" ),
1799+ MemCheckExp (*PSE.getSE(), DL, "scev.check"),
1800+ AddBranchWeights(AddBranchWeights), PSE(PSE) {}
17981801
17991802 // / Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
18001803 // / accurately estimate the cost of the runtime checks. The blocks are
@@ -1941,7 +1944,7 @@ class GeneratedRTChecks {
19411944
19421945 // Get the best known TC estimate.
19431946 if (auto EstimatedTC = getSmallBestKnownTC (
1944- *SE , OuterLoop, /* CanUseConstantMax = */ false ))
1947+ PSE , OuterLoop, /* CanUseConstantMax = */ false ))
19451948 BestTripCount = *EstimatedTC;
19461949
19471950 BestTripCount = std::max (BestTripCount, 1U );
@@ -2272,8 +2275,7 @@ static bool isIndvarOverflowCheckKnownFalse(
22722275 // We know the runtime overflow check is known false iff the (max) trip-count
22732276 // is known and (max) trip-count + (VF * UF) does not overflow in the type of
22742277 // the vector loop induction variable.
2275- if (unsigned TC =
2276- Cost->PSE .getSE ()->getSmallConstantMaxTripCount (Cost->TheLoop )) {
2278+ if (unsigned TC = Cost->PSE .getSmallConstantMaxTripCount ()) {
22772279 uint64_t MaxVF = VF.getKnownMinValue ();
22782280 if (VF.isScalable ()) {
22792281 std::optional<unsigned > MaxVScale =
@@ -3962,8 +3964,10 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39623964 }
39633965
39643966 unsigned TC = PSE.getSE ()->getSmallConstantTripCount (TheLoop);
3965- unsigned MaxTC = PSE.getSE ()-> getSmallConstantMaxTripCount (TheLoop );
3967+ unsigned MaxTC = PSE.getSmallConstantMaxTripCount ();
39663968 LLVM_DEBUG (dbgs () << " LV: Found trip count: " << TC << ' \n ' );
3969+ if (TC != MaxTC)
3970+ LLVM_DEBUG (dbgs () << " LV: Found maximum trip count: " << MaxTC << ' \n ' );
39673971 if (TC == 1 ) {
39683972 reportVectorizationFailure (" Single iteration (non) loop" ,
39693973 " loop trip count is one, irrelevant for vectorization" ,
@@ -4257,7 +4261,7 @@ bool LoopVectorizationPlanner::isMoreProfitable(
42574261 InstructionCost CostA = A.Cost ;
42584262 InstructionCost CostB = B.Cost ;
42594263
4260- unsigned MaxTripCount = PSE.getSE ()-> getSmallConstantMaxTripCount (OrigLoop );
4264+ unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
42614265
42624266 // Improve estimate for the vector width if it is scalable.
42634267 unsigned EstimatedWidthA = A.Width .getKnownMinValue ();
@@ -4852,7 +4856,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
48524856 if (!Legal->isSafeForAnyVectorWidth ())
48534857 return 1 ;
48544858
4855- auto BestKnownTC = getSmallBestKnownTC (* PSE. getSE () , TheLoop);
4859+ auto BestKnownTC = getSmallBestKnownTC (PSE, TheLoop);
48564860 const bool HasReductions = !Legal->getReductionVars ().empty ();
48574861
48584862 // If we did not calculate the cost for VF (because the user selected the VF)
@@ -9618,8 +9622,8 @@ static bool processLoopInVPlanNativePath(
96189622 {
96199623 bool AddBranchWeights =
96209624 hasBranchWeightMD (*L->getLoopLatch ()->getTerminator ());
9621- GeneratedRTChecks Checks (* PSE. getSE () , DT, LI, TTI,
9622- F-> getDataLayout (), AddBranchWeights);
9625+ GeneratedRTChecks Checks (PSE, DT, LI, TTI, F-> getDataLayout () ,
9626+ AddBranchWeights);
96239627 InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
96249628 VF.Width , 1 , LVL, &CM, BFI, PSI, Checks);
96259629 LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
@@ -9683,7 +9687,7 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
96839687static bool areRuntimeChecksProfitable (GeneratedRTChecks &Checks,
96849688 VectorizationFactor &VF,
96859689 std::optional<unsigned > VScale, Loop *L,
9686- ScalarEvolution &SE ,
9690+ PredicatedScalarEvolution &PSE ,
96879691 ScalarEpilogueLowering SEL) {
96889692 InstructionCost CheckCost = Checks.getCost ();
96899693 if (!CheckCost.isValid ())
@@ -9768,7 +9772,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
97689772
97699773 // Skip vectorization if the expected trip count is less than the minimum
97709774 // required trip count.
9771- if (auto ExpectedTC = getSmallBestKnownTC (SE , L)) {
9775+ if (auto ExpectedTC = getSmallBestKnownTC (PSE , L)) {
97729776 if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
97739777 VF.MinProfitableTripCount )) {
97749778 LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
@@ -9875,7 +9879,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98759879
98769880 // Check the loop for a trip count threshold: vectorize loops with a tiny trip
98779881 // count by optimizing for size, to minimize overheads.
9878- auto ExpectedTC = getSmallBestKnownTC (*SE , L);
9882+ auto ExpectedTC = getSmallBestKnownTC (PSE , L);
98799883 if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
98809884 LLVM_DEBUG (dbgs () << " LV: Found a loop with a very small trip count. "
98819885 << " This loop is worth vectorizing only if no scalar "
@@ -9973,8 +9977,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99739977
99749978 bool AddBranchWeights =
99759979 hasBranchWeightMD (*L->getLoopLatch ()->getTerminator ());
9976- GeneratedRTChecks Checks (* PSE. getSE () , DT, LI, TTI,
9977- F-> getDataLayout (), AddBranchWeights);
9980+ GeneratedRTChecks Checks (PSE, DT, LI, TTI, F-> getDataLayout () ,
9981+ AddBranchWeights);
99789982 if (LVP.hasPlanWithVF (VF.Width )) {
99799983 // Select the interleave count.
99809984 IC = CM.selectInterleaveCount (VF.Width , VF.Cost );
@@ -9990,7 +9994,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99909994 Hints.getForce () == LoopVectorizeHints::FK_Enabled;
99919995 if (!ForceVectorization &&
99929996 !areRuntimeChecksProfitable (Checks, VF, getVScaleForTuning (L, *TTI), L,
9993- * PSE. getSE () , SEL)) {
9997+ PSE, SEL)) {
99949998 ORE->emit ([&]() {
99959999 return OptimizationRemarkAnalysisAliasing (
999610000 DEBUG_TYPE, " CantReorderMemOps" , L->getStartLoc (),
0 commit comments