Skip to content

Commit 06c0982

Browse files
author
Mikhail Gudim
committed
fixed a bug
added some comments added `const` in `UpdateSortedIndices`. updated tests
1 parent b9c6119 commit 06c0982

File tree

3 files changed

+34
-153
lines changed

3 files changed

+34
-153
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6971,7 +6971,9 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
69716971
Type *ElemTy, Align CommonAlignment,
69726972
SmallVectorImpl<unsigned> &SortedIndices,
69736973
StridedPtrInfo &SPtrInfo) const {
6974-
// Group the pointers by constant offset.
6974+
// If each value in `PointerOps` is of the form `%x + Offset` where `Offset`
6975+
// is constant for each offset we record values from `PointerOps` and their
6976+
// indicies in `PointerOps`.
69756977
SmallDenseMap<int64_t, std::pair<SmallVector<Value *>, SmallVector<unsigned>>>
69766978
OffsetToPointerOpIdxMap;
69776979
for (auto [Idx, Ptr] : enumerate(PointerOps)) {
@@ -7011,6 +7013,7 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
70117013
TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment)))
70127014
return false;
70137015

7016+
// Check if the offsets are contiguous.
70147017
SmallVector<int64_t> SortedOffsetsV;
70157018
for (auto [K, _] : OffsetToPointerOpIdxMap)
70167019
SortedOffsetsV.push_back(K);
@@ -7025,6 +7028,11 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
70257028
}
70267029
}
70277030

7031+
// For the set of pointers with the same offset check that the distance
7032+
// between adjacent pointers are all equal to the same value (stride). As we
7033+
// do that, also calculate SortedIndices. Since we should not modify
7034+
// `SortedIndices` unless we know that all the checks succeede, record the
7035+
// indicies into `SortedIndicesDraft`.
70287036
int64_t LowestOffset = SortedOffsetsV[0];
70297037
SmallVector<Value *> &PointerOps0 =
70307038
OffsetToPointerOpIdxMap[LowestOffset].first;
@@ -7046,8 +7054,13 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef<Value *> PointerOps,
70467054
SortedIndicesDraft.resize(Sz);
70477055
auto UpdateSortedIndices =
70487056
[&](SmallVectorImpl<unsigned> &SortedIndicesForOffset,
7049-
SmallVectorImpl<unsigned> &IndicesInAllPointerOps,
7050-
int64_t OffsetNum) {
7057+
const SmallVectorImpl<unsigned> &IndicesInAllPointerOps,
7058+
const int64_t OffsetNum) {
7059+
if (SortedIndicesForOffset.empty()) {
7060+
SortedIndicesForOffset.resize(IndicesInAllPointerOps.size());
7061+
std::iota(SortedIndicesForOffset.begin(),
7062+
SortedIndicesForOffset.end(), 0);
7063+
}
70517064
for (const auto [Num, Idx] : enumerate(SortedIndicesForOffset)) {
70527065
SortedIndicesDraft[Num * NumOffsets + OffsetNum] =
70537066
IndicesInAllPointerOps[Idx];

llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll

Lines changed: 6 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -332,85 +332,11 @@ define void @rt_stride_1_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
332332
; CHECK-LABEL: define void @rt_stride_1_no_reordering(
333333
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
334334
; CHECK-NEXT: [[STRIDE0:%.*]] = mul nsw i64 [[STRIDE]], 0
335-
; CHECK-NEXT: [[STRIDE1:%.*]] = mul nsw i64 [[STRIDE]], 1
336-
; CHECK-NEXT: [[STRIDE2:%.*]] = mul nsw i64 [[STRIDE]], 2
337-
; CHECK-NEXT: [[STRIDE3:%.*]] = mul nsw i64 [[STRIDE]], 3
338-
; CHECK-NEXT: [[STRIDE4:%.*]] = mul nsw i64 [[STRIDE]], 4
339-
; CHECK-NEXT: [[STRIDE5:%.*]] = mul nsw i64 [[STRIDE]], 5
340-
; CHECK-NEXT: [[STRIDE6:%.*]] = mul nsw i64 [[STRIDE]], 6
341-
; CHECK-NEXT: [[STRIDE7:%.*]] = mul nsw i64 [[STRIDE]], 7
342-
; CHECK-NEXT: [[STRIDE8:%.*]] = mul nsw i64 [[STRIDE]], 8
343-
; CHECK-NEXT: [[STRIDE9:%.*]] = mul nsw i64 [[STRIDE]], 9
344-
; CHECK-NEXT: [[STRIDE10:%.*]] = mul nsw i64 [[STRIDE]], 10
345-
; CHECK-NEXT: [[STRIDE11:%.*]] = mul nsw i64 [[STRIDE]], 11
346-
; CHECK-NEXT: [[STRIDE12:%.*]] = mul nsw i64 [[STRIDE]], 12
347-
; CHECK-NEXT: [[STRIDE13:%.*]] = mul nsw i64 [[STRIDE]], 13
348-
; CHECK-NEXT: [[STRIDE14:%.*]] = mul nsw i64 [[STRIDE]], 14
349-
; CHECK-NEXT: [[STRIDE15:%.*]] = mul nsw i64 [[STRIDE]], 15
350335
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE0]]
351-
; CHECK-NEXT: [[GEP_L1:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE1]]
352-
; CHECK-NEXT: [[GEP_L2:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE2]]
353-
; CHECK-NEXT: [[GEP_L3:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE3]]
354-
; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE4]]
355-
; CHECK-NEXT: [[GEP_L5:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE5]]
356-
; CHECK-NEXT: [[GEP_L6:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE6]]
357-
; CHECK-NEXT: [[GEP_L7:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE7]]
358-
; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE8]]
359-
; CHECK-NEXT: [[GEP_L9:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE9]]
360-
; CHECK-NEXT: [[GEP_L10:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE10]]
361-
; CHECK-NEXT: [[GEP_L11:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE11]]
362-
; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE12]]
363-
; CHECK-NEXT: [[GEP_L13:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE13]]
364-
; CHECK-NEXT: [[GEP_L14:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE14]]
365-
; CHECK-NEXT: [[GEP_L15:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[STRIDE15]]
366-
; CHECK-NEXT: [[LOAD0:%.*]] = load i8, ptr [[GEP_L0]], align 1
367-
; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr [[GEP_L1]], align 1
368-
; CHECK-NEXT: [[LOAD2:%.*]] = load i8, ptr [[GEP_L2]], align 1
369-
; CHECK-NEXT: [[LOAD3:%.*]] = load i8, ptr [[GEP_L3]], align 1
370-
; CHECK-NEXT: [[LOAD4:%.*]] = load i8, ptr [[GEP_L4]], align 1
371-
; CHECK-NEXT: [[LOAD5:%.*]] = load i8, ptr [[GEP_L5]], align 1
372-
; CHECK-NEXT: [[LOAD6:%.*]] = load i8, ptr [[GEP_L6]], align 1
373-
; CHECK-NEXT: [[LOAD7:%.*]] = load i8, ptr [[GEP_L7]], align 1
374-
; CHECK-NEXT: [[LOAD8:%.*]] = load i8, ptr [[GEP_L8]], align 1
375-
; CHECK-NEXT: [[LOAD9:%.*]] = load i8, ptr [[GEP_L9]], align 1
376-
; CHECK-NEXT: [[LOAD10:%.*]] = load i8, ptr [[GEP_L10]], align 1
377-
; CHECK-NEXT: [[LOAD11:%.*]] = load i8, ptr [[GEP_L11]], align 1
378-
; CHECK-NEXT: [[LOAD12:%.*]] = load i8, ptr [[GEP_L12]], align 1
379-
; CHECK-NEXT: [[LOAD13:%.*]] = load i8, ptr [[GEP_L13]], align 1
380-
; CHECK-NEXT: [[LOAD14:%.*]] = load i8, ptr [[GEP_L14]], align 1
381-
; CHECK-NEXT: [[LOAD15:%.*]] = load i8, ptr [[GEP_L15]], align 1
382336
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
383-
; CHECK-NEXT: [[GEP_S1:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 1
384-
; CHECK-NEXT: [[GEP_S2:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 2
385-
; CHECK-NEXT: [[GEP_S3:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 3
386-
; CHECK-NEXT: [[GEP_S4:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 4
387-
; CHECK-NEXT: [[GEP_S5:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 5
388-
; CHECK-NEXT: [[GEP_S6:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 6
389-
; CHECK-NEXT: [[GEP_S7:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 7
390-
; CHECK-NEXT: [[GEP_S8:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 8
391-
; CHECK-NEXT: [[GEP_S9:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 9
392-
; CHECK-NEXT: [[GEP_S10:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 10
393-
; CHECK-NEXT: [[GEP_S11:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 11
394-
; CHECK-NEXT: [[GEP_S12:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 12
395-
; CHECK-NEXT: [[GEP_S13:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 13
396-
; CHECK-NEXT: [[GEP_S14:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 14
397-
; CHECK-NEXT: [[GEP_S15:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 15
398-
; CHECK-NEXT: store i8 [[LOAD0]], ptr [[GEP_S0]], align 1
399-
; CHECK-NEXT: store i8 [[LOAD1]], ptr [[GEP_S1]], align 1
400-
; CHECK-NEXT: store i8 [[LOAD2]], ptr [[GEP_S2]], align 1
401-
; CHECK-NEXT: store i8 [[LOAD3]], ptr [[GEP_S3]], align 1
402-
; CHECK-NEXT: store i8 [[LOAD4]], ptr [[GEP_S4]], align 1
403-
; CHECK-NEXT: store i8 [[LOAD5]], ptr [[GEP_S5]], align 1
404-
; CHECK-NEXT: store i8 [[LOAD6]], ptr [[GEP_S6]], align 1
405-
; CHECK-NEXT: store i8 [[LOAD7]], ptr [[GEP_S7]], align 1
406-
; CHECK-NEXT: store i8 [[LOAD8]], ptr [[GEP_S8]], align 1
407-
; CHECK-NEXT: store i8 [[LOAD9]], ptr [[GEP_S9]], align 1
408-
; CHECK-NEXT: store i8 [[LOAD10]], ptr [[GEP_S10]], align 1
409-
; CHECK-NEXT: store i8 [[LOAD11]], ptr [[GEP_S11]], align 1
410-
; CHECK-NEXT: store i8 [[LOAD12]], ptr [[GEP_S12]], align 1
411-
; CHECK-NEXT: store i8 [[LOAD13]], ptr [[GEP_S13]], align 1
412-
; CHECK-NEXT: store i8 [[LOAD14]], ptr [[GEP_S14]], align 1
413-
; CHECK-NEXT: store i8 [[LOAD15]], ptr [[GEP_S15]], align 1
337+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE]], 1
338+
; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr align 1 [[GEP_L0]], i64 [[TMP1]], <16 x i1> splat (i1 true), i32 16)
339+
; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 1
414340
; CHECK-NEXT: ret void
415341
;
416342
%stride0 = mul nsw i64 %stride, 0
@@ -784,25 +710,11 @@ define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
784710
; CHECK-LABEL: define void @rt_stride_widen_no_reordering(
785711
; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
786712
; CHECK-NEXT: [[OFFSET0:%.*]] = mul nsw i64 [[STRIDE]], 0
787-
; CHECK-NEXT: [[OFFSET4:%.*]] = mul nsw i64 [[STRIDE]], 1
788-
; CHECK-NEXT: [[OFFSET8:%.*]] = mul nsw i64 [[STRIDE]], 2
789-
; CHECK-NEXT: [[OFFSET12:%.*]] = mul nsw i64 [[STRIDE]], 3
790713
; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET0]]
791-
; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET4]]
792-
; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET8]]
793-
; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET12]]
794714
; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
795-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
796-
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
797-
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
798-
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
799-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
800-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
801-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
802-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
803-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
804-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
805-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
715+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE]], 1
716+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 [[GEP_L0]], i64 [[TMP1]], <4 x i1> splat (i1 true), i32 4)
717+
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8>
806718
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
807719
; CHECK-NEXT: ret void
808720
;

0 commit comments

Comments
 (0)