Skip to content

Conversation

@alexey-bataev
Copy link
Member

Enables interaleaved stores, results in better estimation for segmented
stores for RISC-V

Created using spr 1.3.5
@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2024

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Enables interaleaved stores, results in better estimation for segmented
stores for RISC-V


Full diff: https://github.com/llvm/llvm-project/pull/115354.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+36-4)
  • (modified) llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll (+4-11)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 184413b420089a..bff0684a2e6f16 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9759,6 +9759,29 @@ void BoUpSLP::transformNodes() {
           // Strided store is more profitable than reverse + consecutive store -
           // transform the node to strided store.
           E.State = TreeEntry::StridedVectorize;
+      } else if (!E.ReorderIndices.empty()) {
+        // Check for interleaved stores.
+        auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
+          assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
+          if (Mask.size() < 4)
+            return 0u;
+          for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
+            if (ShuffleVectorInst::isInterleaveMask(
+                    Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
+                TTI.isLegalInterleavedAccessType(
+                    VecTy, Factor,
+                    cast<StoreInst>(E.Scalars.front())->getAlign(),
+                    cast<StoreInst>(E.Scalars.front())
+                        ->getPointerAddressSpace()))
+              return Factor;
+          }
+
+          return 0u;
+        };
+        SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
+        unsigned InterleaveFactor = IsInterleaveMask(Mask);
+        if (InterleaveFactor != 0)
+          E.setInterleave(InterleaveFactor);
       }
       break;
     }
@@ -11428,10 +11451,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       } else {
         assert(E->State == TreeEntry::Vectorize &&
                "Expected either strided or consecutive stores.");
-        TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
-        VecStCost = TTI->getMemoryOpCost(
-            Instruction::Store, VecTy, BaseSI->getAlign(),
-            BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+        if (unsigned Factor = E->getInterleaveFactor()) {
+          assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
+                 "No reused shuffles expected");
+          CommonCost = 0;
+          VecStCost = TTI->getInterleavedMemoryOpCost(
+              Instruction::Store, VecTy, Factor, std::nullopt,
+              BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
+        } else {
+          TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
+          VecStCost = TTI->getMemoryOpCost(
+              Instruction::Store, VecTy, BaseSI->getAlign(),
+              BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+        }
       }
       return VecStCost + CommonCost;
     };
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
index ae1c3e1ee0da20..071d0b972f23a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
@@ -8,7 +8,7 @@
 ; YAML-NEXT: Function:        test
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'Stores SLP vectorized with cost '
-; YAML-NEXT:   - Cost:            '-1'
+; YAML-NEXT:   - Cost:            '-2'
 ; YAML-NEXT:   - String:          ' and with tree size '
 ; YAML-NEXT:   - TreeSize:        '2'
 define void @test(ptr %h) {
@@ -17,16 +17,9 @@ define void @test(ptr %h) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[DCT2X211:%.*]] = alloca [0 x [0 x [8 x i64]]], i32 0, align 16
 ; CHECK-NEXT:    [[CHROMA_DC209:%.*]] = getelementptr i8, ptr [[H]], i64 0
-; CHECK-NEXT:    [[ARRAYIDX33_I:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 8
-; CHECK-NEXT:    [[ARRAYIDX36_I181:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 24
-; CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 4 [[DCT2X211]], i64 16, <2 x i1> splat (i1 true), i32 2)
-; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr [[CHROMA_DC209]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX33_I]], align 2
-; CHECK-NEXT:    [[ARRAYIDX5_I226:%.*]] = getelementptr i8, ptr [[H]], i64 16
-; CHECK-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX5_I226]], align 2
-; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX36_I181]], align 2
-; CHECK-NEXT:    [[ARRAYIDX7_I228:%.*]] = getelementptr i8, ptr [[H]], i64 24
-; CHECK-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX7_I228]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[DCT2X211]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[CHROMA_DC209]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:

@alexey-bataev
Copy link
Member Author

Ping!

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one minor

if (Mask.size() < 4)
return 0u;
for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
if (ShuffleVectorInst::isInterleaveMask(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull out repeated cast:
auto *SclStore = cast<StoreInst>(E.Scalars.front());

Created using spr 1.3.5
@alexey-bataev alexey-bataev merged commit f6e1d64 into main Nov 15, 2024
4 of 6 checks passed
@alexey-bataev alexey-bataev deleted the users/alexey-bataev/spr/slpenable-interleaved-stores-support branch November 15, 2024 16:01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants