Skip to content

Commit f6e1d64

Browse files
[SLP]Enable interleaved stores support
Enables interaleaved stores, results in better estimation for segmented stores for RISC-V Reviewers: preames, topperc, RKSimon Reviewed By: RKSimon Pull Request: #115354
1 parent bc3b0fa commit f6e1d64

File tree

2 files changed

+39
-15
lines changed

2 files changed

+39
-15
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9771,6 +9771,28 @@ void BoUpSLP::transformNodes() {
97719771
// Strided store is more profitable than reverse + consecutive store -
97729772
// transform the node to strided store.
97739773
E.State = TreeEntry::StridedVectorize;
9774+
} else if (!E.ReorderIndices.empty()) {
9775+
// Check for interleaved stores.
9776+
auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
9777+
auto *BaseSI = cast<StoreInst>(E.Scalars.front());
9778+
assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
9779+
if (Mask.size() < 4)
9780+
return 0u;
9781+
for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
9782+
if (ShuffleVectorInst::isInterleaveMask(
9783+
Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
9784+
TTI.isLegalInterleavedAccessType(
9785+
VecTy, Factor, BaseSI->getAlign(),
9786+
BaseSI->getPointerAddressSpace()))
9787+
return Factor;
9788+
}
9789+
9790+
return 0u;
9791+
};
9792+
SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
9793+
unsigned InterleaveFactor = IsInterleaveMask(Mask);
9794+
if (InterleaveFactor != 0)
9795+
E.setInterleave(InterleaveFactor);
97749796
}
97759797
break;
97769798
}
@@ -11441,10 +11463,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1144111463
} else {
1144211464
assert(E->State == TreeEntry::Vectorize &&
1144311465
"Expected either strided or consecutive stores.");
11444-
TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
11445-
VecStCost = TTI->getMemoryOpCost(
11446-
Instruction::Store, VecTy, BaseSI->getAlign(),
11447-
BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
11466+
if (unsigned Factor = E->getInterleaveFactor()) {
11467+
assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
11468+
"No reused shuffles expected");
11469+
CommonCost = 0;
11470+
VecStCost = TTI->getInterleavedMemoryOpCost(
11471+
Instruction::Store, VecTy, Factor, std::nullopt,
11472+
BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
11473+
} else {
11474+
TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
11475+
VecStCost = TTI->getMemoryOpCost(
11476+
Instruction::Store, VecTy, BaseSI->getAlign(),
11477+
BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
11478+
}
1144811479
}
1144911480
return VecStCost + CommonCost;
1145011481
};

llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; YAML-NEXT: Function: test
99
; YAML-NEXT: Args:
1010
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
11-
; YAML-NEXT: - Cost: '-1'
11+
; YAML-NEXT: - Cost: '-2'
1212
; YAML-NEXT: - String: ' and with tree size '
1313
; YAML-NEXT: - TreeSize: '2'
1414
define void @test(ptr %h) {
@@ -17,16 +17,9 @@ define void @test(ptr %h) {
1717
; CHECK-NEXT: [[ENTRY:.*:]]
1818
; CHECK-NEXT: [[DCT2X211:%.*]] = alloca [0 x [0 x [8 x i64]]], i32 0, align 16
1919
; CHECK-NEXT: [[CHROMA_DC209:%.*]] = getelementptr i8, ptr [[H]], i64 0
20-
; CHECK-NEXT: [[ARRAYIDX33_I:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 8
21-
; CHECK-NEXT: [[ARRAYIDX36_I181:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 24
22-
; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 4 [[DCT2X211]], i64 16, <2 x i1> splat (i1 true), i32 2)
23-
; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[CHROMA_DC209]], align 2
24-
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX33_I]], align 2
25-
; CHECK-NEXT: [[ARRAYIDX5_I226:%.*]] = getelementptr i8, ptr [[H]], i64 16
26-
; CHECK-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX5_I226]], align 2
27-
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX36_I181]], align 2
28-
; CHECK-NEXT: [[ARRAYIDX7_I228:%.*]] = getelementptr i8, ptr [[H]], i64 24
29-
; CHECK-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX7_I228]], align 2
20+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[DCT2X211]], align 16
21+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
22+
; CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[CHROMA_DC209]], align 2
3023
; CHECK-NEXT: ret void
3124
;
3225
entry:

0 commit comments

Comments
 (0)