-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[SLPVectorizer] Widen constant strided loads. #162324
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2248,7 +2248,6 @@ class BoUpSLP { | |||||
| /// Return true if an array of scalar loads can be replaced with a strided | ||||||
| /// load (with constant stride). | ||||||
| /// | ||||||
| /// TODO: | ||||||
| /// It is possible that the load gets "widened". Suppose that originally each | ||||||
| /// load loads `k` bytes and `PointerOps` can be arranged as follows (`%s` is | ||||||
| /// constant): %b + 0 * %s + 0 %b + 0 * %s + 1 %b + 0 * %s + 2 | ||||||
|
|
@@ -6942,32 +6941,99 @@ bool BoUpSLP::analyzeConstantStrideCandidate( | |||||
| const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff, | ||||||
| Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const { | ||||||
| const size_t Sz = PointerOps.size(); | ||||||
| if (!isStridedLoad(PointerOps, ScalarTy, Alignment, Diff, Sz)) | ||||||
| SmallVector<int64_t> SortedOffsetsFromBase(Sz); | ||||||
| // Go through `PointerOps` in sorted order and record offsets from `Ptr0`. | ||||||
| for (unsigned I : seq<unsigned>(Sz)) { | ||||||
| Value *Ptr = | ||||||
| SortedIndices.empty() ? PointerOps[I] : PointerOps[SortedIndices[I]]; | ||||||
| SortedOffsetsFromBase[I] = | ||||||
| *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); | ||||||
| } | ||||||
|
|
||||||
| // The code below checks that `SortedOffsetsFromBase` looks as follows: | ||||||
| // ``` | ||||||
| // [ | ||||||
| // (e_{0, 0}, e_{0, 1}, ..., e_{0, GroupSize - 1}), // first group | ||||||
| // (e_{1, 0}, e_{1, 1}, ..., e_{1, GroupSize - 1}), // secon group | ||||||
| // ... | ||||||
| // (e_{NumGroups - 1, 0}, e_{NumGroups - 1, 1}, ..., e_{NumGroups - 1, | ||||||
| // GroupSize - 1}), // last group | ||||||
| // ] | ||||||
| // ``` | ||||||
| // The distance between consecutive elements within each group should all be | ||||||
| // the same `StrideWithinGroup`. The distance between the first elements of | ||||||
| // consecutive groups should all be the same `StrideBetweenGroups`. | ||||||
|
|
||||||
| int64_t StrideWithinGroup = | ||||||
| SortedOffsetsFromBase[1] - SortedOffsetsFromBase[0]; | ||||||
| // Determine size of the first group. Later we will check that all other | ||||||
| // groups have the same size. | ||||||
| auto isEndOfGroupIndex = [=, &SortedOffsetsFromBase](unsigned Idx) { | ||||||
mgudim marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| return SortedOffsetsFromBase[Idx] - SortedOffsetsFromBase[Idx - 1] != | ||||||
| StrideWithinGroup; | ||||||
| }; | ||||||
| unsigned GroupSize = *llvm::find_if(seq<unsigned>(1, Sz), isEndOfGroupIndex); | ||||||
mgudim marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| unsigned VecSz = Sz; | ||||||
| Type *NewScalarTy = ScalarTy; | ||||||
| int64_t StrideIntVal = StrideWithinGroup; | ||||||
| FixedVectorType *StridedLoadTy = getWidenedType(NewScalarTy, VecSz); | ||||||
|
||||||
|
|
||||||
| // Quick detour: at this point we can say what the type of strided load would | ||||||
| // be if all the checks pass. Check if this type is legal for the target. | ||||||
| bool NeedsWidening = Sz != GroupSize; | ||||||
| if (NeedsWidening) { | ||||||
| if (Sz % GroupSize != 0) | ||||||
| return false; | ||||||
| VecSz = Sz / GroupSize; | ||||||
|
|
||||||
| if (StrideWithinGroup != 1) | ||||||
| return false; | ||||||
| unsigned VecSz = Sz / GroupSize; | ||||||
| NewScalarTy = Type::getIntNTy( | ||||||
| SE->getContext(), | ||||||
| DL->getTypeSizeInBits(ScalarTy).getFixedValue() * GroupSize); | ||||||
| StridedLoadTy = getWidenedType(NewScalarTy, VecSz); | ||||||
| } | ||||||
|
|
||||||
| if (!isStridedLoad(PointerOps, NewScalarTy, Alignment, Diff, VecSz)) | ||||||
| return false; | ||||||
|
|
||||||
| int64_t Stride = Diff / static_cast<int64_t>(Sz - 1); | ||||||
| if (NeedsWidening) { | ||||||
| // Continue with checking the "shape" of `SortedOffsetsFromBase`. | ||||||
| // Check that the strides between groups are all the same. | ||||||
| unsigned CurrentGroupStartIdx = GroupSize; | ||||||
| int64_t StrideBetweenGroups = | ||||||
| SortedOffsetsFromBase[GroupSize] - SortedOffsetsFromBase[0]; | ||||||
| StrideIntVal = StrideBetweenGroups; | ||||||
| for (; CurrentGroupStartIdx < Sz; CurrentGroupStartIdx += GroupSize) { | ||||||
| if (SortedOffsetsFromBase[CurrentGroupStartIdx] - | ||||||
| SortedOffsetsFromBase[CurrentGroupStartIdx - GroupSize] != | ||||||
| StrideBetweenGroups) | ||||||
| return false; | ||||||
| } | ||||||
mgudim marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
|
||||||
| // Iterate through all pointers and check if all distances are | ||||||
| // unique multiple of Dist. | ||||||
| SmallSet<int64_t, 4> Dists; | ||||||
| for (Value *Ptr : PointerOps) { | ||||||
| int64_t Dist = 0; | ||||||
| if (Ptr == PtrN) | ||||||
| Dist = Diff; | ||||||
| else if (Ptr != Ptr0) | ||||||
| Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); | ||||||
| // If the strides are not the same or repeated, we can't | ||||||
| // vectorize. | ||||||
| if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second) | ||||||
| break; | ||||||
| } | ||||||
| if (Dists.size() == Sz) { | ||||||
| Type *StrideTy = DL->getIndexType(Ptr0->getType()); | ||||||
| SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride); | ||||||
| SPtrInfo.Ty = getWidenedType(ScalarTy, Sz); | ||||||
| return true; | ||||||
| auto CheckGroup = [&](const unsigned StartIdx, const unsigned GroupSize0, | ||||||
| const int64_t StrideWithinGroup) -> bool { | ||||||
| unsigned GroupEndIdx = StartIdx + 1; | ||||||
| for (; GroupEndIdx != Sz; ++GroupEndIdx) { | ||||||
| if (SortedOffsetsFromBase[GroupEndIdx] - | ||||||
| SortedOffsetsFromBase[GroupEndIdx - 1] != | ||||||
| StrideWithinGroup) | ||||||
| break; | ||||||
| } | ||||||
| return GroupEndIdx - StartIdx == GroupSize0; | ||||||
|
||||||
| break; | |
| return false; |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no, because we're computing GroupEndIdx in this loop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually, I can rewrite this using find_if as with the previous
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, please
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Uh oh!
There was an error while loading. Please reload this page.