From 523d70ccd60d85925e15dbdfba3592afc050ae02 Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Wed, 18 Jun 2025 10:45:20 +0000 Subject: [PATCH 1/4] Test precommit --- .../AArch64/sve-fixed-length-splat-segment.ll | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll new file mode 100644 index 0000000000000..625b1287b4d79 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +;; Patterns that lower to concat_vectors where all incoming operands are the same. + +define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i8q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.b, p0, z0.b, z0.b +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <16 x i8> %data, <16 x i8> poison, <32 x i32> + store <32 x i8> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i16q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.h, p0, z0.h, z0.h +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <8 x i16> poison, <8 x i16> %data, <16 x i32> + store <16 x i16> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i32q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <8 x i32> + store <8 x i32> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i64q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <2 x i64> poison, <2 x i64> %data, <4 x i32> + store <4 x i64> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_f16q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.h, p0, z0.h, z0.h +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <8 x half> poison, <8 x half> %data, <16 x i32> + store <16 x half> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_bf16q_256(<8 x bfloat> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_bf16q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <8 x bfloat> poison, <8 x bfloat> %data, <16 x i32> + store <16 x bfloat> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_f32q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x float> %data, <4 x float> poison, <8 x i32> + store <8 x float> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_f64q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <2 x double> poison, <2 x double> %data, <4 x i32> + store <4 x double> %splat, ptr %addr, align 1 + ret void +} + +;; Test a wider vector + +define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i32q_512_with_256_vectors: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: str z0, [x0, #1, mul vl] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> + store <16 x i32> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i32q_512_with_512_vectors(<4 x i32> %data, ptr %addr) #1 { +; CHECK-LABEL: concat_i32q_512_with_512_vectors: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> + store <16 x i32> %splat, ptr %addr, align 1 + ret void +} + +attributes #0 = { vscale_range(2,2) "target-features"="+sve,+bf16" } +attributes #1 = { vscale_range(4,4) "target-features"="+sve,+bf16" } From 0d697240679a524ae0a9257459765d27d1e4af4a Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Wed, 18 Jun 2025 12:22:25 +0000 Subject: [PATCH 2/4] Match 128b splats expressed as concat_vectors --- .../Target/AArch64/AArch64ISelLowering.cpp | 11 +++++++ .../AArch64/sve-fixed-length-splat-segment.ll | 29 ++++++------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1169efce3123f..2546a49aaed2c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29317,6 +29317,17 @@ SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE( EVT VT = Op.getValueType(); EVT SrcVT = SrcOp1.getValueType(); + // Match a splat of 128b segments that fit in a single register. + if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values()) && + VT.getSizeInBits() <= Subtarget->getMinSVEVectorSizeInBits()) { + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + SDValue Splat = + DAG.getNode(AArch64ISD::DUPLANE128, DL, ContainerVT, + convertToScalableVector(DAG, ContainerVT, SrcOp1), + DAG.getConstant(0, DL, MVT::i64, /*isTarget=*/true)); + return convertFromScalableVector(DAG, VT, Splat); + } + if (NumOperands > 2) { SmallVector Ops; EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext()); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll index 625b1287b4d79..f1ff37b640026 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll @@ -6,9 +6,8 @@ define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i8q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.b, p0, z0.b, z0.b +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <16 x i8> %data, <16 x i8> poison, <32 x i32> %data, ptr %addr) #0 { define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i16q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.h, p0, z0.h, z0.h +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <8 x i16> poison, <8 x i16> %data, <16 x i32> %data, ptr %addr) #0 { define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i32q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <8 x i32> %data, ptr %addr) #0 { define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i64q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x i64> poison, <2 x i64> %data, <4 x i32> %data, ptr %addr) #0 { define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_f16q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.h, p0, z0.h, z0.h +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <8 x half> poison, <8 x half> %data, <16 x i32> %data, ptr %addr) #0 { define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_f32q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x float> %data, <4 x float> poison, <8 x i32> %data, ptr %addr) #0 { define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_f64q_256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x double> poison, <2 x double> %data, <4 x i32> %data, ptr %addr) #0 { define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 { ; CHECK-LABEL: concat_i32q_512_with_256_vectors: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0, #1, mul vl] ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret @@ -134,11 +126,8 @@ define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 { define void @concat_i32q_512_with_512_vectors(<4 x i32> %data, ptr %addr) #1 { ; CHECK-LABEL: concat_i32q_512_with_512_vectors: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s -; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s +; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> Date: Wed, 18 Jun 2025 13:40:01 +0000 Subject: [PATCH 3/4] Remove unnecessary check --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2546a49aaed2c..5af7d2bab301f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29318,8 +29318,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE( EVT SrcVT = SrcOp1.getValueType(); // Match a splat of 128b segments that fit in a single register. - if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values()) && - VT.getSizeInBits() <= Subtarget->getMinSVEVectorSizeInBits()) { + if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values())) { EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); SDValue Splat = DAG.getNode(AArch64ISD::DUPLANE128, DL, ContainerVT, From c3a7b12bed53e4edeac3a99e76dbe86699738d45 Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Fri, 20 Jun 2025 13:17:03 +0000 Subject: [PATCH 4/4] Use indices from 0, since we're not testing splats of segments beyond the first yet --- .../AArch64/sve-fixed-length-splat-segment.ll | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll index f1ff37b640026..a5d213c658c69 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll @@ -23,8 +23,8 @@ define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 { ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret - %splat = shufflevector <8 x i16> poison, <8 x i16> %data, <16 x i32> + %splat = shufflevector <8 x i16> %data, <8 x i16> poison, <16 x i32> store <16 x i16> %splat, ptr %addr, align 1 ret void } @@ -49,8 +49,8 @@ define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 { ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret - %splat = shufflevector <2 x i64> poison, <2 x i64> %data, <4 x i32> + %splat = shufflevector <2 x i64> %data, <2 x i64> poison, <4 x i32> store <4 x i64> %splat, ptr %addr, align 1 ret void } @@ -62,8 +62,8 @@ define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 { ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret - %splat = shufflevector <8 x half> poison, <8 x half> %data, <16 x i32> + %splat = shufflevector <8 x half> %data, <8 x half> poison, <16 x i32> store <16 x half> %splat, ptr %addr, align 1 ret void } @@ -73,8 +73,8 @@ define void @concat_bf16q_256(<8 x bfloat> %data, ptr %addr) #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret - %splat = shufflevector <8 x bfloat> poison, <8 x bfloat> %data, <16 x i32> + %splat = shufflevector <8 x bfloat> %data, <8 x bfloat> poison, <16 x i32> store <16 x bfloat> %splat, ptr %addr, align 1 ret void } @@ -99,8 +99,8 @@ define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 { ; CHECK-NEXT: mov z0.q, q0 ; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret - %splat = shufflevector <2 x double> poison, <2 x double> %data, <4 x i32> + %splat = shufflevector <2 x double> %data, <2 x double> poison, <4 x i32> store <4 x double> %splat, ptr %addr, align 1 ret void }