@@ -419,6 +419,13 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
419419 return DL.getTypeAllocSizeInBits (Ty) != DL.getTypeSizeInBits (Ty);
420420}
421421
422+ // / A version of ScalarEvolution::getSmallConstantTripCount that returns an
423+ // / ElementCount to include loops whose trip count is a function of vscale.
424+ static ElementCount getSmallConstantTripCount (ScalarEvolution *SE,
425+ const Loop *L) {
426+ return ElementCount::getFixed (SE->getSmallConstantTripCount (L));
427+ }
428+
422429// / Returns "best known" trip count, which is either a valid positive trip count
423430// / or std::nullopt when an estimate cannot be made (including when the trip
424431// / count would overflow), for the specified loop \p L as defined by the
@@ -427,24 +434,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
427434// / 2) Returns expected trip count according to profile data if any.
428435// / 3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
429436// / 4) Returns std::nullopt if all of the above failed.
430- static std::optional<unsigned >
437+ static std::optional<ElementCount >
431438getSmallBestKnownTC (PredicatedScalarEvolution &PSE, Loop *L,
432439 bool CanUseConstantMax = true ) {
433440 // Check if exact trip count is known.
434- if (unsigned ExpectedTC = PSE.getSE ()-> getSmallConstantTripCount ( L))
441+ if (auto ExpectedTC = getSmallConstantTripCount ( PSE.getSE (), L))
435442 return ExpectedTC;
436443
437444 // Check if there is an expected trip count available from profile data.
438445 if (LoopVectorizeWithBlockFrequency)
439446 if (auto EstimatedTC = getLoopEstimatedTripCount (L))
440- return *EstimatedTC;
447+ return ElementCount::getFixed ( *EstimatedTC) ;
441448
442449 if (!CanUseConstantMax)
443450 return std::nullopt ;
444451
445452 // Check if upper bound estimate is known.
446453 if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount ())
447- return ExpectedTC;
454+ return ElementCount::getFixed ( ExpectedTC) ;
448455
449456 return std::nullopt ;
450457}
@@ -1960,7 +1967,8 @@ class GeneratedRTChecks {
19601967 // Get the best known TC estimate.
19611968 if (auto EstimatedTC = getSmallBestKnownTC (
19621969 PSE, OuterLoop, /* CanUseConstantMax = */ false ))
1963- BestTripCount = *EstimatedTC;
1970+ if (EstimatedTC->isFixed ())
1971+ BestTripCount = EstimatedTC->getFixedValue ();
19641972
19651973 InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
19661974
@@ -3750,12 +3758,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
37503758 }
37513759
37523760 ScalarEvolution *SE = PSE.getSE ();
3753- unsigned TC = SE-> getSmallConstantTripCount (TheLoop);
3761+ ElementCount TC = getSmallConstantTripCount (SE, TheLoop);
37543762 unsigned MaxTC = PSE.getSmallConstantMaxTripCount ();
37553763 LLVM_DEBUG (dbgs () << " LV: Found trip count: " << TC << ' \n ' );
3756- if (TC != MaxTC)
3764+ if (TC != ElementCount::getFixed ( MaxTC) )
37573765 LLVM_DEBUG (dbgs () << " LV: Found maximum trip count: " << MaxTC << ' \n ' );
3758- if (TC == 1 ) {
3766+ if (TC. isScalar () ) {
37593767 reportVectorizationFailure (" Single iteration (non) loop" ,
37603768 " loop trip count is one, irrelevant for vectorization" ,
37613769 " SingleIterationLoop" , ORE, TheLoop);
@@ -3869,7 +3877,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
38693877 }
38703878
38713879 auto ExpectedTC = getSmallBestKnownTC (PSE, TheLoop);
3872- if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold ()) {
3880+ if (ExpectedTC && ExpectedTC->isFixed () &&
3881+ ExpectedTC->getFixedValue () <=
3882+ TTI.getMinTripCountTailFoldingThreshold ()) {
38733883 if (MaxPowerOf2RuntimeVF > 0u ) {
38743884 // If we have a low-trip-count, and the fixed-width VF is known to divide
38753885 // the trip count but the scalable factor does not, use the fixed-width
@@ -3927,7 +3937,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39273937 return FixedScalableVFPair::getNone ();
39283938 }
39293939
3930- if (TC == 0 ) {
3940+ if (TC. isZero () ) {
39313941 reportVectorizationFailure (
39323942 " unable to calculate the loop count due to complex control flow" ,
39333943 " UnknownLoopCountComplexCFG" , ORE, TheLoop);
@@ -4816,13 +4826,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
48164826 // At least one iteration must be scalar when this constraint holds. So the
48174827 // maximum available iterations for interleaving is one less.
48184828 unsigned AvailableTC = requiresScalarEpilogue (VF.isVector ())
4819- ? (* BestKnownTC) - 1
4820- : * BestKnownTC;
4829+ ? BestKnownTC-> getFixedValue ( ) - 1
4830+ : BestKnownTC-> getFixedValue () ;
48214831
48224832 unsigned InterleaveCountLB = bit_floor (std::max (
48234833 1u , std::min (AvailableTC / (EstimatedVF * 2 ), MaxInterleaveCount)));
48244834
4825- if (PSE.getSE ()-> getSmallConstantTripCount ( TheLoop) > 0 ) {
4835+ if (getSmallConstantTripCount ( PSE.getSE (), TheLoop). isNonZero () ) {
48264836 // If the best known trip count is exact, we select between two
48274837 // prospective ICs, where
48284838 //
@@ -5182,8 +5192,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
51825192 // costs of comparison and induction instructions, as they'll get simplified
51835193 // away.
51845194 SmallPtrSet<Instruction *, 2 > ValuesToIgnoreForVF;
5185- auto TC = PSE.getSE ()-> getSmallConstantTripCount ( TheLoop);
5186- if (VF. isFixed () && TC == VF. getFixedValue () && !foldTailByMasking ())
5195+ auto TC = getSmallConstantTripCount ( PSE.getSE (), TheLoop);
5196+ if (TC == VF && !foldTailByMasking ())
51875197 addFullyUnrolledInstructionsToIgnore (TheLoop, Legal->getInductionVars (),
51885198 ValuesToIgnoreForVF);
51895199
@@ -6878,8 +6888,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
68786888 // simplified away.
68796889 // TODO: Remove this code after stepping away from the legacy cost model and
68806890 // adding code to simplify VPlans before calculating their costs.
6881- auto TC = PSE.getSE ()-> getSmallConstantTripCount ( OrigLoop);
6882- if (VF. isFixed () && TC == VF. getFixedValue () && !CM.foldTailByMasking ())
6891+ auto TC = getSmallConstantTripCount ( PSE.getSE (), OrigLoop);
6892+ if (TC == VF && !CM.foldTailByMasking ())
68836893 addFullyUnrolledInstructionsToIgnore (OrigLoop, Legal->getInductionVars (),
68846894 CostCtx.SkipCostComputation );
68856895
@@ -9647,8 +9657,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
96479657 // Skip vectorization if the expected trip count is less than the minimum
96489658 // required trip count.
96499659 if (auto ExpectedTC = getSmallBestKnownTC (PSE, L)) {
9650- if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
9651- VF.MinProfitableTripCount )) {
9660+ if (ElementCount::isKnownLT (*ExpectedTC, VF.MinProfitableTripCount )) {
96529661 LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
96539662 " trip count < minimum profitable VF ("
96549663 << *ExpectedTC << " < " << VF.MinProfitableTripCount
@@ -10018,7 +10027,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1001810027 // Check the loop for a trip count threshold: vectorize loops with a tiny trip
1001910028 // count by optimizing for size, to minimize overheads.
1002010029 auto ExpectedTC = getSmallBestKnownTC (PSE, L);
10021- if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
10030+ if (ExpectedTC && ExpectedTC->isFixed () &&
10031+ ExpectedTC->getFixedValue () < TinyTripCountVectorThreshold) {
1002210032 LLVM_DEBUG (dbgs () << " LV: Found a loop with a very small trip count. "
1002310033 << " This loop is worth vectorizing only if no scalar "
1002410034 << " iteration overheads are incurred." );
0 commit comments