-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SLP]Enable interleaved stores support #115354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Enable interleaved stores support #115354
Conversation
Created using spr 1.3.5
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesEnables interaleaved stores, results in better estimation for segmented Full diff: https://github.com/llvm/llvm-project/pull/115354.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 184413b420089a..bff0684a2e6f16 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9759,6 +9759,29 @@ void BoUpSLP::transformNodes() {
// Strided store is more profitable than reverse + consecutive store -
// transform the node to strided store.
E.State = TreeEntry::StridedVectorize;
+ } else if (!E.ReorderIndices.empty()) {
+ // Check for interleaved stores.
+ auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
+ assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
+ if (Mask.size() < 4)
+ return 0u;
+ for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
+ if (ShuffleVectorInst::isInterleaveMask(
+ Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
+ TTI.isLegalInterleavedAccessType(
+ VecTy, Factor,
+ cast<StoreInst>(E.Scalars.front())->getAlign(),
+ cast<StoreInst>(E.Scalars.front())
+ ->getPointerAddressSpace()))
+ return Factor;
+ }
+
+ return 0u;
+ };
+ SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
+ unsigned InterleaveFactor = IsInterleaveMask(Mask);
+ if (InterleaveFactor != 0)
+ E.setInterleave(InterleaveFactor);
}
break;
}
@@ -11428,10 +11451,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
} else {
assert(E->State == TreeEntry::Vectorize &&
"Expected either strided or consecutive stores.");
- TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
- VecStCost = TTI->getMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getAlign(),
- BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+ if (unsigned Factor = E->getInterleaveFactor()) {
+ assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
+ "No reused shuffles expected");
+ CommonCost = 0;
+ VecStCost = TTI->getInterleavedMemoryOpCost(
+ Instruction::Store, VecTy, Factor, std::nullopt,
+ BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
+ } else {
+ TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
+ VecStCost = TTI->getMemoryOpCost(
+ Instruction::Store, VecTy, BaseSI->getAlign(),
+ BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+ }
}
return VecStCost + CommonCost;
};
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
index ae1c3e1ee0da20..071d0b972f23a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '-1'
+; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
define void @test(ptr %h) {
@@ -17,16 +17,9 @@ define void @test(ptr %h) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DCT2X211:%.*]] = alloca [0 x [0 x [8 x i64]]], i32 0, align 16
; CHECK-NEXT: [[CHROMA_DC209:%.*]] = getelementptr i8, ptr [[H]], i64 0
-; CHECK-NEXT: [[ARRAYIDX33_I:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 8
-; CHECK-NEXT: [[ARRAYIDX36_I181:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 24
-; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 4 [[DCT2X211]], i64 16, <2 x i1> splat (i1 true), i32 2)
-; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[CHROMA_DC209]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX33_I]], align 2
-; CHECK-NEXT: [[ARRAYIDX5_I226:%.*]] = getelementptr i8, ptr [[H]], i64 16
-; CHECK-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX5_I226]], align 2
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX36_I181]], align 2
-; CHECK-NEXT: [[ARRAYIDX7_I228:%.*]] = getelementptr i8, ptr [[H]], i64 24
-; CHECK-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX7_I228]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[DCT2X211]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[CHROMA_DC209]], align 2
; CHECK-NEXT: ret void
;
entry:
|
|
Ping! |
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one minor
| if (Mask.size() < 4) | ||
| return 0u; | ||
| for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) { | ||
| if (ShuffleVectorInst::isInterleaveMask( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull out repeated cast:
auto *SclStore = cast<StoreInst>(E.Scalars.front());
Created using spr 1.3.5
Enables interaleaved stores, results in better estimation for segmented
stores for RISC-V