diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 29d65d5d1db64..a41e3f73fd5b4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17973,7 +17973,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Instruction *Load, Value *Mask, IntrinsicInst *DI) const { const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); - if (Factor != 2 && Factor != 4) { + if (Factor != 2 && Factor != 3 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); return false; } @@ -18052,7 +18052,7 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( Instruction *Store, Value *Mask, ArrayRef InterleavedValues) const { unsigned Factor = InterleavedValues.size(); - if (Factor != 2 && Factor != 4) { + if (Factor != 2 && Factor != 3 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n"); return false; } diff --git a/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll new file mode 100644 index 0000000000000..0d41dc9113978 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-elf -mattr=+sve | FileCheck %s -check-prefixes=SVE + +define void @load_factor2(i32* %ptr, * %s1, * %s2) { +; SVE-LABEL: load_factor2: +; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0] +; SVE-NEXT: str z0, [x1] +; SVE-NEXT: str z1, [x2] +; SVE-NEXT: ret + %wide.vec = load , ptr %ptr, align 8 + %ldN = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.vec) + + %3 = extractvalue { , } %ldN, 0 + %4 = extractvalue { , } %ldN, 1 + + store %3, * %s1 + store %4, * %s2 + ret void +} + +define void @load_factor3(i32* %ptr, * %s1, * %s2, * %s3) { +; SVE-LABEL: load_factor3: +; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0] +; SVE-NEXT: str z0, [x1] +; SVE-NEXT: str z1, [x2] +; SVE-NEXT: str z2, [x3] +; SVE-NEXT: ret + %wide.vec = load , ptr %ptr, align 8 + %ldN = tail call { , , } @llvm.vector.deinterleave3.nxv12i32( %wide.vec) + + %3 = extractvalue { , , } %ldN, 0 + %4 = extractvalue { , , } %ldN, 1 + %5 = extractvalue { , , } %ldN, 2 + + store %3, * %s1 + store %4, * %s2 + store %5, * %s3 + ret void +} + +define void @load_factor4(i32* %ptr, * %s1, * %s2, * %s3, * %s4) { +; SVE-LABEL: load_factor4: +; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0] +; SVE-NEXT: str z0, [x1] +; SVE-NEXT: str z1, [x2] +; SVE-NEXT: str z2, [x3] +; SVE-NEXT: str z3, [x4] +; SVE-NEXT: ret + %wide.vec = load , ptr %ptr, align 8 + %ldN = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( %wide.vec) + + %3 = extractvalue { , , , } %ldN, 0 + %4 = extractvalue { , , , } %ldN, 1 + %5 = extractvalue { , , , } %ldN, 2 + %6 = extractvalue { , , , } %ldN, 3 + + store %3, * %s1 + store %4, * %s2 + store %5, * %s3 + store %6, * %s4 + ret void +} + + +declare { , } @llvm.vector.deinterleave2.nxv8i32() +declare { , , } @llvm.vector.deinterleave3.nxv12i32() +declare { , , , } @llvm.vector.deinterleave4.nxv16i32() +