@@ -419,6 +419,12 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
419419 return DL.getTypeAllocSizeInBits (Ty) != DL.getTypeSizeInBits (Ty);
420420}
421421
422+ // / A version of ScalarEvolution::getSmallConstantTripCount that returns an
423+ // / ElementCount to include loops whose trip count is a function of vscale.
424+ ElementCount getSmallConstantTripCount (ScalarEvolution *SE, const Loop *L) {
425+ return ElementCount::getFixed (SE->getSmallConstantTripCount (L));
426+ }
427+
422428// / Returns "best known" trip count, which is either a valid positive trip count
423429// / or std::nullopt when an estimate cannot be made (including when the trip
424430// / count would overflow), for the specified loop \p L as defined by the
@@ -427,24 +433,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
427433// / 2) Returns expected trip count according to profile data if any.
428434// / 3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
429435// / 4) Returns std::nullopt if all of the above failed.
430- static std::optional<unsigned >
436+ static std::optional<ElementCount >
431437getSmallBestKnownTC (PredicatedScalarEvolution &PSE, Loop *L,
432438 bool CanUseConstantMax = true ) {
433439 // Check if exact trip count is known.
434- if (unsigned ExpectedTC = PSE.getSE ()-> getSmallConstantTripCount ( L))
440+ if (auto ExpectedTC = getSmallConstantTripCount ( PSE.getSE (), L))
435441 return ExpectedTC;
436442
437443 // Check if there is an expected trip count available from profile data.
438444 if (LoopVectorizeWithBlockFrequency)
439445 if (auto EstimatedTC = getLoopEstimatedTripCount (L))
440- return *EstimatedTC;
446+ return ElementCount::getFixed ( *EstimatedTC) ;
441447
442448 if (!CanUseConstantMax)
443449 return std::nullopt ;
444450
445451 // Check if upper bound estimate is known.
446452 if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount ())
447- return ExpectedTC;
453+ return ElementCount::getFixed ( ExpectedTC) ;
448454
449455 return std::nullopt ;
450456}
@@ -1960,7 +1966,8 @@ class GeneratedRTChecks {
19601966 // Get the best known TC estimate.
19611967 if (auto EstimatedTC = getSmallBestKnownTC (
19621968 PSE, OuterLoop, /* CanUseConstantMax = */ false ))
1963- BestTripCount = *EstimatedTC;
1969+ if (EstimatedTC->isFixed ())
1970+ BestTripCount = EstimatedTC->getFixedValue ();
19641971
19651972 InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
19661973
@@ -3751,12 +3758,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
37513758 }
37523759
37533760 ScalarEvolution *SE = PSE.getSE ();
3754- unsigned TC = SE-> getSmallConstantTripCount (TheLoop);
3761+ ElementCount TC = getSmallConstantTripCount (SE, TheLoop);
37553762 unsigned MaxTC = PSE.getSmallConstantMaxTripCount ();
37563763 LLVM_DEBUG (dbgs () << " LV: Found trip count: " << TC << ' \n ' );
3757- if (TC != MaxTC)
3764+ if (TC != ElementCount::getFixed ( MaxTC) )
37583765 LLVM_DEBUG (dbgs () << " LV: Found maximum trip count: " << MaxTC << ' \n ' );
3759- if (TC == 1 ) {
3766+ if (TC. isScalar () ) {
37603767 reportVectorizationFailure (" Single iteration (non) loop" ,
37613768 " loop trip count is one, irrelevant for vectorization" ,
37623769 " SingleIterationLoop" , ORE, TheLoop);
@@ -3870,7 +3877,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
38703877 }
38713878
38723879 auto ExpectedTC = getSmallBestKnownTC (PSE, TheLoop);
3873- if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold ()) {
3880+ if (ExpectedTC && ExpectedTC->isFixed () &&
3881+ ExpectedTC->getFixedValue () <=
3882+ TTI.getMinTripCountTailFoldingThreshold ()) {
38743883 if (MaxPowerOf2RuntimeVF > 0u ) {
38753884 // If we have a low-trip-count, and the fixed-width VF is known to divide
38763885 // the trip count but the scalable factor does not, use the fixed-width
@@ -3928,7 +3937,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
39283937 return FixedScalableVFPair::getNone ();
39293938 }
39303939
3931- if (TC == 0 ) {
3940+ if (TC. isZero () ) {
39323941 reportVectorizationFailure (
39333942 " unable to calculate the loop count due to complex control flow" ,
39343943 " UnknownLoopCountComplexCFG" , ORE, TheLoop);
@@ -4817,13 +4826,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
48174826 // At least one iteration must be scalar when this constraint holds. So the
48184827 // maximum available iterations for interleaving is one less.
48194828 unsigned AvailableTC = requiresScalarEpilogue (VF.isVector ())
4820- ? (* BestKnownTC) - 1
4821- : * BestKnownTC;
4829+ ? BestKnownTC-> getFixedValue ( ) - 1
4830+ : BestKnownTC-> getFixedValue () ;
48224831
48234832 unsigned InterleaveCountLB = bit_floor (std::max (
48244833 1u , std::min (AvailableTC / (EstimatedVF * 2 ), MaxInterleaveCount)));
48254834
4826- if (PSE.getSE ()-> getSmallConstantTripCount ( TheLoop) > 0 ) {
4835+ if (getSmallConstantTripCount ( PSE.getSE (), TheLoop). isNonZero () ) {
48274836 // If the best known trip count is exact, we select between two
48284837 // prospective ICs, where
48294838 //
@@ -5183,8 +5192,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
51835192 // costs of comparison and induction instructions, as they'll get simplified
51845193 // away.
51855194 SmallPtrSet<Instruction *, 2 > ValuesToIgnoreForVF;
5186- auto TC = PSE.getSE ()-> getSmallConstantTripCount ( TheLoop);
5187- if (VF. isFixed () && TC == VF. getFixedValue () && !foldTailByMasking ())
5195+ auto TC = getSmallConstantTripCount ( PSE.getSE (), TheLoop);
5196+ if (TC == VF && !foldTailByMasking ())
51885197 addFullyUnrolledInstructionsToIgnore (TheLoop, Legal->getInductionVars (),
51895198 ValuesToIgnoreForVF);
51905199
@@ -6884,8 +6893,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
68846893 // simplified away.
68856894 // TODO: Remove this code after stepping away from the legacy cost model and
68866895 // adding code to simplify VPlans before calculating their costs.
6887- auto TC = PSE.getSE ()-> getSmallConstantTripCount ( OrigLoop);
6888- if (VF. isFixed () && TC == VF. getFixedValue () && !CM.foldTailByMasking ())
6896+ auto TC = getSmallConstantTripCount ( PSE.getSE (), OrigLoop);
6897+ if (TC == VF && !CM.foldTailByMasking ())
68896898 addFullyUnrolledInstructionsToIgnore (OrigLoop, Legal->getInductionVars (),
68906899 CostCtx.SkipCostComputation );
68916900
@@ -9641,8 +9650,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
96419650 // Skip vectorization if the expected trip count is less than the minimum
96429651 // required trip count.
96439652 if (auto ExpectedTC = getSmallBestKnownTC (PSE, L)) {
9644- if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
9645- VF.MinProfitableTripCount )) {
9653+ if (ElementCount::isKnownLT (*ExpectedTC, VF.MinProfitableTripCount )) {
96469654 LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
96479655 " trip count < minimum profitable VF ("
96489656 << *ExpectedTC << " < " << VF.MinProfitableTripCount
@@ -10012,7 +10020,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1001210020 // Check the loop for a trip count threshold: vectorize loops with a tiny trip
1001310021 // count by optimizing for size, to minimize overheads.
1001410022 auto ExpectedTC = getSmallBestKnownTC (PSE, L);
10015- if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
10023+ if (ExpectedTC && ExpectedTC->isFixed () &&
10024+ ExpectedTC->getFixedValue () < TinyTripCountVectorThreshold) {
1001610025 LLVM_DEBUG (dbgs () << " LV: Found a loop with a very small trip count. "
1001710026 << " This loop is worth vectorizing only if no scalar "
1001810027 << " iteration overheads are incurred." );
0 commit comments