llvm · Rajveer100 · Oct 8, 2025 · paulwalker-arm · Oct 9, 2025
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17973,7 +17973,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
 bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
     Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
   const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
-  if (Factor != 2 && Factor != 4) {
+  if (Factor != 2 && Factor != 3 && Factor != 4) {
     LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
     return false;
   }
@@ -18052,7 +18052,7 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
     Instruction *Store, Value *Mask,
     ArrayRef<Value *> InterleavedValues) const {
   unsigned Factor = InterleavedValues.size();
-  if (Factor != 2 && Factor != 4) {
+  if (Factor != 2 && Factor != 3 && Factor != 4) {
     LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
     return false;
   }

diff --git a/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-elf -mattr=+sve | FileCheck %s -check-prefixes=SVE
+
+define void @load_factor2(i32* %ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2) {
+; SVE-LABEL: load_factor2:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    ld2w { z0.s, z1.s }, p0/z, [x0]
+; SVE-NEXT:    str z0, [x1]
+; SVE-NEXT:    str z1, [x2]
+; SVE-NEXT:    ret
+  %wide.vec = load <vscale x 8 x i32>, ptr %ptr, align 8
+  %ldN = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.vec)
+
+  %3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
+  %4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
+
+  store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
+  store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
+  ret void
+}
+
+define void @load_factor3(i32* %ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2, <vscale x 4 x i32>* %s3) {
+; SVE-LABEL: load_factor3:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    ld3w { z0.s - z2.s }, p0/z, [x0]
+; SVE-NEXT:    str z0, [x1]
+; SVE-NEXT:    str z1, [x2]
+; SVE-NEXT:    str z2, [x3]
+; SVE-NEXT:    ret
+  %wide.vec = load <vscale x 12 x i32>, ptr %ptr, align 8
+  %ldN = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32> %wide.vec)
+
+  %3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
+  %4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
+  %5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 2
+
+  store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
+  store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
+  store <vscale x 4 x i32> %5, <vscale x 4 x i32>* %s3
+  ret void
+}
+
+define void @load_factor4(i32* %ptr, <vscale x 4 x i32>* %s1, <vscale x 4 x i32>* %s2, <vscale x 4 x i32>* %s3, <vscale x 4 x i32>* %s4) {
+; SVE-LABEL: load_factor4:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    ld4w { z0.s - z3.s }, p0/z, [x0]
+; SVE-NEXT:    str z0, [x1]
+; SVE-NEXT:    str z1, [x2]
+; SVE-NEXT:    str z2, [x3]
+; SVE-NEXT:    str z3, [x4]
+; SVE-NEXT:    ret
+  %wide.vec = load <vscale x 16 x i32>, ptr %ptr, align 8
+  %ldN = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %wide.vec)
+
+  %3 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 0
+  %4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 1
+  %5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 2
+  %6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %ldN, 3
+
+  store <vscale x 4 x i32> %3, <vscale x 4 x i32>* %s1
+  store <vscale x 4 x i32> %4, <vscale x 4 x i32>* %s2
+  store <vscale x 4 x i32> %5, <vscale x 4 x i32>* %s3
+  store <vscale x 4 x i32> %6, <vscale x 4 x i32>* %s4
+  ret void
+}
+
+
+declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave3.nxv12i32(<vscale x 12 x i32>)
+declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32>)
+