From 7f56e0b4ff283abedccc4257d66694432a51d49a Mon Sep 17 00:00:00 2001 From: Rajveer Date: Wed, 8 Oct 2025 18:00:53 +0530 Subject: [PATCH] [AArch64][SVE] Allow factors other than 2/4 for load+deinterleave3 patterns for codegen Resolves #159801 and #162068 --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 +- .../AArch64/sve-vector-load+deinterleave.ll | 74 +++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 29d65d5d1db64..a41e3f73fd5b4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17973,7 +17973,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Instruction *Load, Value *Mask, IntrinsicInst *DI) const { const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); - if (Factor != 2 && Factor != 4) { + if (Factor != 2 && Factor != 3 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); return false; } @@ -18052,7 +18052,7 @@ bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( Instruction *Store, Value *Mask, ArrayRef InterleavedValues) const { unsigned Factor = InterleavedValues.size(); - if (Factor != 2 && Factor != 4) { + if (Factor != 2 && Factor != 3 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n"); return false; } diff --git a/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll new file mode 100644 index 0000000000000..0d41dc9113978 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vector-load+deinterleave.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-elf -mattr=+sve | FileCheck %s -check-prefixes=SVE + +define void @load_factor2(i32* %ptr, * %s1, * %s2) { +; SVE-LABEL: load_factor2: +; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0] +; SVE-NEXT: str z0, [x1] +; SVE-NEXT: str z1, [x2] +; SVE-NEXT: ret + %wide.vec = load , ptr %ptr, align 8 + %ldN = tail call { , } @llvm.vector.deinterleave2.nxv8i32( %wide.vec) + + %3 = extractvalue { , } %ldN, 0 + %4 = extractvalue { , } %ldN, 1 + + store %3, * %s1 + store %4, * %s2 + ret void +} + +define void @load_factor3(i32* %ptr, * %s1, * %s2, * %s3) { +; SVE-LABEL: load_factor3: +; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0] +; SVE-NEXT: str z0, [x1] +; SVE-NEXT: str z1, [x2] +; SVE-NEXT: str z2, [x3] +; SVE-NEXT: ret + %wide.vec = load , ptr %ptr, align 8 + %ldN = tail call { , , } @llvm.vector.deinterleave3.nxv12i32( %wide.vec) + + %3 = extractvalue { , , } %ldN, 0 + %4 = extractvalue { , , } %ldN, 1 + %5 = extractvalue { , , } %ldN, 2 + + store %3, * %s1 + store %4, * %s2 + store %5, * %s3 + ret void +} + +define void @load_factor4(i32* %ptr, * %s1, * %s2, * %s3, * %s4) { +; SVE-LABEL: load_factor4: +; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0] +; SVE-NEXT: str z0, [x1] +; SVE-NEXT: str z1, [x2] +; SVE-NEXT: str z2, [x3] +; SVE-NEXT: str z3, [x4] +; SVE-NEXT: ret + %wide.vec = load , ptr %ptr, align 8 + %ldN = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( %wide.vec) + + %3 = extractvalue { , , , } %ldN, 0 + %4 = extractvalue { , , , } %ldN, 1 + %5 = extractvalue { , , , } %ldN, 2 + %6 = extractvalue { , , , } %ldN, 3 + + store %3, * %s1 + store %4, * %s2 + store %5, * %s3 + store %6, * %s4 + ret void +} + + +declare { , } @llvm.vector.deinterleave2.nxv8i32() +declare { , , } @llvm.vector.deinterleave3.nxv12i32() +declare { , , , } @llvm.vector.deinterleave4.nxv16i32() +