-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LV] Change getSmallBestKnownTC to return an ElementCount (NFC) #141793
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -419,6 +419,12 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { | |
| return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); | ||
| } | ||
|
|
||
| /// A version of ScalarEvolution::getSmallConstantTripCount that returns an | ||
| /// ElementCount to include loops whose trip count is a function of vscale. | ||
| ElementCount getSmallConstantTripCount(ScalarEvolution *SE, const Loop *L) { | ||
| return ElementCount::getFixed(SE->getSmallConstantTripCount(L)); | ||
| } | ||
|
|
||
| /// Returns "best known" trip count, which is either a valid positive trip count | ||
| /// or std::nullopt when an estimate cannot be made (including when the trip | ||
| /// count would overflow), for the specified loop \p L as defined by the | ||
|
|
@@ -427,24 +433,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { | |
| /// 2) Returns expected trip count according to profile data if any. | ||
| /// 3) Returns upper bound estimate if known, and if \p CanUseConstantMax. | ||
| /// 4) Returns std::nullopt if all of the above failed. | ||
| static std::optional<unsigned> | ||
| static std::optional<ElementCount> | ||
| getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L, | ||
| bool CanUseConstantMax = true) { | ||
|
||
| // Check if exact trip count is known. | ||
| if (unsigned ExpectedTC = PSE.getSE()->getSmallConstantTripCount(L)) | ||
| if (auto ExpectedTC = getSmallConstantTripCount(PSE.getSE(), L)) | ||
| return ExpectedTC; | ||
|
|
||
| // Check if there is an expected trip count available from profile data. | ||
| if (LoopVectorizeWithBlockFrequency) | ||
| if (auto EstimatedTC = getLoopEstimatedTripCount(L)) | ||
| return *EstimatedTC; | ||
| return ElementCount::getFixed(*EstimatedTC); | ||
|
|
||
| if (!CanUseConstantMax) | ||
| return std::nullopt; | ||
|
|
||
| // Check if upper bound estimate is known. | ||
| if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount()) | ||
| return ExpectedTC; | ||
| return ElementCount::getFixed(ExpectedTC); | ||
|
|
||
| return std::nullopt; | ||
| } | ||
|
|
@@ -1960,7 +1966,8 @@ class GeneratedRTChecks { | |
| // Get the best known TC estimate. | ||
| if (auto EstimatedTC = getSmallBestKnownTC( | ||
| PSE, OuterLoop, /* CanUseConstantMax = */ false)) | ||
| BestTripCount = *EstimatedTC; | ||
| if (EstimatedTC->isFixed()) | ||
| BestTripCount = EstimatedTC->getFixedValue(); | ||
|
|
||
| InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount; | ||
|
|
||
|
|
@@ -3751,12 +3758,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { | |
| } | ||
|
|
||
| ScalarEvolution *SE = PSE.getSE(); | ||
| unsigned TC = SE->getSmallConstantTripCount(TheLoop); | ||
| ElementCount TC = getSmallConstantTripCount(SE, TheLoop); | ||
| unsigned MaxTC = PSE.getSmallConstantMaxTripCount(); | ||
| LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); | ||
| if (TC != MaxTC) | ||
| if (TC != ElementCount::getFixed(MaxTC)) | ||
| LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n'); | ||
| if (TC == 1) { | ||
| if (TC.isScalar()) { | ||
| reportVectorizationFailure("Single iteration (non) loop", | ||
| "loop trip count is one, irrelevant for vectorization", | ||
| "SingleIterationLoop", ORE, TheLoop); | ||
|
|
@@ -3870,7 +3877,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { | |
| } | ||
|
|
||
| auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop); | ||
| if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) { | ||
| if (ExpectedTC && ExpectedTC->isFixed() && | ||
| ExpectedTC->getFixedValue() <= | ||
| TTI.getMinTripCountTailFoldingThreshold()) { | ||
| if (MaxPowerOf2RuntimeVF > 0u) { | ||
| // If we have a low-trip-count, and the fixed-width VF is known to divide | ||
| // the trip count but the scalable factor does not, use the fixed-width | ||
|
|
@@ -3928,7 +3937,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { | |
| return FixedScalableVFPair::getNone(); | ||
| } | ||
|
|
||
| if (TC == 0) { | ||
| if (TC.isZero()) { | ||
| reportVectorizationFailure( | ||
| "unable to calculate the loop count due to complex control flow", | ||
| "UnknownLoopCountComplexCFG", ORE, TheLoop); | ||
|
|
@@ -4817,13 +4826,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF, | |
| // At least one iteration must be scalar when this constraint holds. So the | ||
| // maximum available iterations for interleaving is one less. | ||
| unsigned AvailableTC = requiresScalarEpilogue(VF.isVector()) | ||
| ? (*BestKnownTC) - 1 | ||
| : *BestKnownTC; | ||
| ? BestKnownTC->getFixedValue() - 1 | ||
| : BestKnownTC->getFixedValue(); | ||
|
|
||
| unsigned InterleaveCountLB = bit_floor(std::max( | ||
| 1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount))); | ||
|
|
||
| if (PSE.getSE()->getSmallConstantTripCount(TheLoop) > 0) { | ||
| if (getSmallConstantTripCount(PSE.getSE(), TheLoop).isNonZero()) { | ||
| // If the best known trip count is exact, we select between two | ||
| // prospective ICs, where | ||
| // | ||
|
|
@@ -5183,8 +5192,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { | |
| // costs of comparison and induction instructions, as they'll get simplified | ||
| // away. | ||
| SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF; | ||
| auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); | ||
| if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking()) | ||
| auto TC = getSmallConstantTripCount(PSE.getSE(), TheLoop); | ||
| if (TC == VF && !foldTailByMasking()) | ||
| addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(), | ||
| ValuesToIgnoreForVF); | ||
|
|
||
|
|
@@ -6884,8 +6893,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, | |
| // simplified away. | ||
| // TODO: Remove this code after stepping away from the legacy cost model and | ||
| // adding code to simplify VPlans before calculating their costs. | ||
| auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop); | ||
| if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking()) | ||
| auto TC = getSmallConstantTripCount(PSE.getSE(), OrigLoop); | ||
| if (TC == VF && !CM.foldTailByMasking()) | ||
| addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(), | ||
| CostCtx.SkipCostComputation); | ||
|
|
||
|
|
@@ -9641,8 +9650,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks, | |
| // Skip vectorization if the expected trip count is less than the minimum | ||
| // required trip count. | ||
| if (auto ExpectedTC = getSmallBestKnownTC(PSE, L)) { | ||
| if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC), | ||
| VF.MinProfitableTripCount)) { | ||
| if (ElementCount::isKnownLT(*ExpectedTC, VF.MinProfitableTripCount)) { | ||
| LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected " | ||
| "trip count < minimum profitable VF (" | ||
| << *ExpectedTC << " < " << VF.MinProfitableTripCount | ||
|
|
@@ -10012,7 +10020,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
| // Check the loop for a trip count threshold: vectorize loops with a tiny trip | ||
| // count by optimizing for size, to minimize overheads. | ||
| auto ExpectedTC = getSmallBestKnownTC(PSE, L); | ||
| if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { | ||
| if (ExpectedTC && ExpectedTC->isFixed() && | ||
| ExpectedTC->getFixedValue() < TinyTripCountVectorThreshold) { | ||
| LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " | ||
| << "This loop is worth vectorizing only if no scalar " | ||
| << "iteration overheads are incurred."); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be marked
staticor do you expect it to be called from elsewhere?