From 4b7fce219a057f0d11241940ba6d528ad6c72a9f Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 12 Dec 2024 17:56:13 +0000 Subject: [PATCH 1/3] [AArch64][SVE] Don't require 16-byte aligned SVE loads/stores with +strict-align Instead, allow any alignment >= the element size (in bytes). This is all that is needed for vector loads even if unaligned accesses are disabled. See: https://developer.arm.com/documentation/ddi0602/2024-09/Shared-Pseudocode/aarch64-functions-memory?lang=en#impl-aarch64.Mem.read.3 Specifically: ``` // Check alignment on size of element accessed, not overall access size. constant integer alignment = if accdesc.ispair then size DIV 2 else size; ``` The `size` passed to `Mem` by SVE load/store instructions is the element size. --- .../Target/AArch64/AArch64ISelLowering.cpp | 11 ++++ .../AArch64/sve-load-store-strict-align.ll | 58 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e61dedb247756..864e3cfcfeb56 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2569,6 +2569,17 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { + + // Allow SVE loads/stores where the alignment >= the size of the element type, + // even with +strict-align. The SVE loads/stores do not require memory to be + // aligned more than the element type even without unaligned accesses. + // Without this, already aligned loads and stores are forced to have 16-byte + // alignment, which is unnecessary and fails to build as + // TLI.expandUnalignedLoad() and TLI.expandUnalignedStore() don't yet support + // scalable vectors. + if (VT.isScalableVector() && Alignment >= Align(VT.getScalarSizeInBits() / 8)) + return true; + if (Subtarget->requiresStrictAlign()) return false; diff --git a/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll b/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll new file mode 100644 index 0000000000000..94e120b829424 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s | FileCheck %s + +define void @nxv16i8(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: st1b { z0.b }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %ldptr, align 1 + store %l3, ptr %stptr, align 1 + ret void +} + +define void @nxv8i16(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: st1h { z0.h }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %ldptr, align 2 + store %l3, ptr %stptr, align 2 + ret void +} + +define void @nxv4i32(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: st1w { z0.s }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %ldptr, align 4 + store %l3, ptr %stptr, align 4 + ret void +} + +define void @nxv2i64(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %ldptr, align 8 + store %l3, ptr %stptr, align 8 + ret void +} + +; FIXME: Support TLI.expandUnalignedLoad()/TLI.expandUnalignedStore() for SVE. +; define void @unaligned_nxv2i64(ptr %ldptr, ptr %stptr) { +; %l3 = load , ptr %ldptr, align 4 +; store %l3, ptr %stptr, align 4 +; ret void +; } From 5065257c8f87602b70ecd3a70c848411822efe5e Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 13 Dec 2024 13:09:55 +0000 Subject: [PATCH 2/3] Fixups --- .../lib/Target/AArch64/AArch64ISelLowering.cpp | 18 ++++++++++-------- .../AArch64/sve-load-store-strict-align.ll | 16 ++++++++++------ .../sve-unaligned-load-store-strict-align.ll | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 864e3cfcfeb56..8ab5cb2902110 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2571,14 +2571,16 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( unsigned *Fast) const { // Allow SVE loads/stores where the alignment >= the size of the element type, - // even with +strict-align. The SVE loads/stores do not require memory to be - // aligned more than the element type even without unaligned accesses. - // Without this, already aligned loads and stores are forced to have 16-byte - // alignment, which is unnecessary and fails to build as - // TLI.expandUnalignedLoad() and TLI.expandUnalignedStore() don't yet support - // scalable vectors. - if (VT.isScalableVector() && Alignment >= Align(VT.getScalarSizeInBits() / 8)) - return true; + // even with +strict-align. Predicated SVE loads/stores (e.g. ld1/st1), used + // for stores that come from IR, only require element-size alignment (even if + // unaligned accesses are disabled). Without this, these will be forced to + // have 16-byte alignment with +strict-align (and fail to lower as we don't + // yet support TLI.expandUnalignedLoad() and TLI.expandUnalignedStore()). + if (VT.isScalableVector()) { + unsigned ElementSizeBits = VT.getScalarSizeInBits(); + if (ElementSizeBits % 8 == 0 && Alignment >= Align(ElementSizeBits / 8)) + return true; + } if (Subtarget->requiresStrictAlign()) return false; diff --git a/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll b/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll index 94e120b829424..c5b0651ab01d4 100644 --- a/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll +++ b/llvm/test/CodeGen/AArch64/sve-load-store-strict-align.ll @@ -50,9 +50,13 @@ define void @nxv2i64(ptr %ldptr, ptr %stptr) { ret void } -; FIXME: Support TLI.expandUnalignedLoad()/TLI.expandUnalignedStore() for SVE. -; define void @unaligned_nxv2i64(ptr %ldptr, ptr %stptr) { -; %l3 = load , ptr %ldptr, align 4 -; store %l3, ptr %stptr, align 4 -; ret void -; } +define void @nxv16i1(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr p0, [x0] +; CHECK-NEXT: str p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %ldptr, align 2 + store %l3, ptr %stptr, align 2 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll b/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll new file mode 100644 index 0000000000000..62893e6ce7f98 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s | FileCheck %s + +; FIXME: Support TLI.expandUnalignedLoad()/TLI.expandUnalignedStore() for SVE. +; XFAIL: * + +define void @unaligned_nxv16i1(ptr %ldptr, ptr %stptr) { + %l3 = load , ptr %ldptr, align 1 + store %l3, ptr %stptr, align 1 + ret void +} + +define void @unaligned_nxv2i64(ptr %ldptr, ptr %stptr) { + %l3 = load , ptr %ldptr, align 4 + store %l3, ptr %stptr, align 4 + ret void +} From 890c93b5d900a27dfcb003658bb8995d9b268271 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 13 Dec 2024 15:04:11 +0000 Subject: [PATCH 3/3] Fixups --- .../sve-unaligned-load-store-strict-align.ll | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll b/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll index 62893e6ce7f98..27637800f751f 100644 --- a/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll +++ b/llvm/test/CodeGen/AArch64/sve-unaligned-load-store-strict-align.ll @@ -1,17 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s | FileCheck %s +; RUN: not --crash llc -mtriple=aarch64-linux-gnu -mattr=+sve,+strict-align < %s 2>&1 | FileCheck %s --check-prefix=CHECK-FIXME + +; REQUIRES: asserts ; FIXME: Support TLI.expandUnalignedLoad()/TLI.expandUnalignedStore() for SVE. -; XFAIL: * +; CHECK-FIXME: LLVM ERROR: Invalid size request on a scalable vector. define void @unaligned_nxv16i1(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: unaligned_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr p0, [x0] +; CHECK-NEXT: str p0, [x1] +; CHECK-NEXT: ret %l3 = load , ptr %ldptr, align 1 store %l3, ptr %stptr, align 1 ret void } define void @unaligned_nxv2i64(ptr %ldptr, ptr %stptr) { +; CHECK-LABEL: unaligned_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: ret %l3 = load , ptr %ldptr, align 4 store %l3, ptr %stptr, align 4 ret void