-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV]Split store-load forward distance analysis from other checks, NFC #121156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
4a74a77
6201773
6a3c6bb
e938e4a
1a95648
85c7122
0cfee98
d7bc6d6
dc66ca4
c6f318d
bcf7b52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1428,8 +1428,11 @@ class LoopVectorizationCostModel { | |
| /// Selects and saves TailFoldingStyle for 2 options - if IV update may | ||
| /// overflow or not. | ||
| /// \param IsScalableVF true if scalable vector factors enabled. | ||
| /// \param CanTailFoldPowOf2 true if tail folding with power-of-2 | ||
alexey-bataev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| /// safe distance can be enabled. | ||
| /// \param UserIC User specific interleave count. | ||
| void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) { | ||
| void setTailFoldingStyles(bool IsScalableVF, bool CanTailFoldPowOf2, | ||
| unsigned UserIC) { | ||
| assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); | ||
| if (!Legal->canFoldTailByMasking()) { | ||
| ChosenTailFoldingStyle = | ||
|
|
@@ -1438,17 +1441,25 @@ class LoopVectorizationCostModel { | |
| } | ||
|
|
||
| if (!ForceTailFoldingStyle.getNumOccurrences()) { | ||
| ChosenTailFoldingStyle = std::make_pair( | ||
| TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), | ||
| TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)); | ||
| if (!CanTailFoldPowOf2) | ||
| ChosenTailFoldingStyle = | ||
| std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); | ||
alexey-bataev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| else | ||
| ChosenTailFoldingStyle = std::make_pair( | ||
| TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), | ||
| TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)); | ||
| return; | ||
| } | ||
|
|
||
| // Set styles when forced. | ||
| ChosenTailFoldingStyle = std::make_pair(ForceTailFoldingStyle.getValue(), | ||
| ForceTailFoldingStyle.getValue()); | ||
| if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) | ||
| if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) { | ||
| if (!CanTailFoldPowOf2) | ||
| ChosenTailFoldingStyle = | ||
| std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); | ||
alexey-bataev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return; | ||
| } | ||
| // Override forced styles if needed. | ||
| // FIXME: use actual opcode/data type for analysis here. | ||
| // FIXME: Investigate opportunity for fixed vector factor. | ||
|
|
@@ -1459,6 +1470,11 @@ class LoopVectorizationCostModel { | |
| !EnableVPlanNativePath && | ||
| Legal->getFixedOrderRecurrences().empty(); | ||
| if (!EVLIsLegal) { | ||
| if (!CanTailFoldPowOf2) { | ||
| ChosenTailFoldingStyle = | ||
| std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); | ||
| return; | ||
alexey-bataev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| // If for some reason EVL mode is unsupported, fallback to | ||
| // DataWithoutLaneMask to try to vectorize the loop with folded tail | ||
| // in a generic way. | ||
|
|
@@ -3835,7 +3851,9 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() { | |
| return false; | ||
| } | ||
|
|
||
| if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(*TheFunction, TTI)) { | ||
| if ((!Legal->isSafeForAnyVectorWidth() || | ||
| Legal->getMaxStoreLoadForwardSafeVFPowerOf2()) && | ||
| !getMaxVScale(*TheFunction, TTI)) { | ||
| reportVectorizationInfo("The target does not provide maximum vscale value " | ||
| "for safe distance analysis.", | ||
| "ScalableVFUnfeasible", ORE, TheLoop); | ||
|
|
@@ -3853,7 +3871,8 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { | |
|
|
||
| auto MaxScalableVF = ElementCount::getScalable( | ||
| std::numeric_limits<ElementCount::ScalarTy>::max()); | ||
| if (Legal->isSafeForAnyVectorWidth()) | ||
| if (Legal->isSafeForAnyVectorWidth() && | ||
| !Legal->getMaxStoreLoadForwardSafeVFPowerOf2()) | ||
| return MaxScalableVF; | ||
|
|
||
| std::optional<unsigned> MaxVScale = getMaxVScale(*TheFunction, TTI); | ||
|
|
@@ -3879,13 +3898,22 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF( | |
| // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from | ||
| // the memory accesses that is most restrictive (involved in the smallest | ||
| // dependence distance). | ||
| unsigned MaxSafeElements = | ||
| llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); | ||
|
|
||
| auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements); | ||
| auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements); | ||
| if (!Legal->isSafeForAnyVectorWidth()) | ||
| this->MaxSafeElements = MaxSafeElements; | ||
| unsigned MaxSafeElements = Legal->getMaxSafeVectorWidthInBits() / WidestType; | ||
| if (Legal->isSafeForAnyVectorWidth()) | ||
| MaxSafeElements = bit_ceil(MaxSafeElements); | ||
| else | ||
| MaxSafeElements = bit_floor(MaxSafeElements); | ||
| unsigned MaxSafeElementsPowerOf2 = MaxSafeElements; | ||
| if (std::optional<unsigned> SLDist = | ||
| Legal->getMaxStoreLoadForwardSafeVFPowerOf2()) | ||
| MaxSafeElementsPowerOf2 = | ||
| std::min(MaxSafeElementsPowerOf2, *SLDist / WidestType); | ||
| auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2); | ||
| auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2); | ||
|
|
||
| if (!Legal->isSafeForAnyVectorWidth() || | ||
| Legal->getMaxStoreLoadForwardSafeVFPowerOf2()) | ||
| this->MaxSafeElements = MaxSafeElementsPowerOf2; | ||
|
|
||
| LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF | ||
| << ".\n"); | ||
|
|
@@ -4113,14 +4141,16 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { | |
| LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); | ||
| return MaxFactors; | ||
| } | ||
| MaxPowerOf2RuntimeVF.reset(); | ||
|
||
| } | ||
|
|
||
| // If we don't know the precise trip count, or if the trip count that we | ||
| // found modulo the vectorization factor is not zero, try to fold the tail | ||
| // by masking. | ||
| // FIXME: look for a smaller MaxVF that does divide TC rather than masking. | ||
| bool ContainsScalableVF = MaxFactors.ScalableVF.isNonZero(); | ||
| setTailFoldingStyles(ContainsScalableVF, UserIC); | ||
| setTailFoldingStyles(ContainsScalableVF, !MaxPowerOf2RuntimeVF.has_value(), | ||
| UserIC); | ||
| if (foldTailByMasking()) { | ||
| if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) { | ||
| LLVM_DEBUG( | ||
|
|
@@ -4138,6 +4168,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { | |
| return MaxFactors; | ||
| } | ||
|
|
||
| if (MaxPowerOf2RuntimeVF) { | ||
| // Accept MaxFixedVF if we do not have a tail. | ||
| LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); | ||
| return MaxFactors; | ||
| } | ||
|
|
||
| // If there was a tail-folding hint/switch, but we can't fold the tail by | ||
| // masking, fallback to a vectorization with a scalar epilogue. | ||
| if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) { | ||
|
|
@@ -4913,7 +4949,8 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, | |
| } | ||
|
|
||
| // We used the distance for the interleave count. | ||
| if (!Legal->isSafeForAnyVectorWidth()) | ||
| if (!Legal->isSafeForAnyVectorWidth() || | ||
alexey-bataev marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Legal->getMaxStoreLoadForwardSafeVFPowerOf2()) | ||
| return 1; | ||
|
|
||
| // We don't attempt to perform interleaving for loops with uncountable early | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.