diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1169efce3123f..5af7d2bab301f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29317,6 +29317,16 @@ SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE( EVT VT = Op.getValueType(); EVT SrcVT = SrcOp1.getValueType(); + // Match a splat of 128b segments that fit in a single register. + if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values())) { + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + SDValue Splat = + DAG.getNode(AArch64ISD::DUPLANE128, DL, ContainerVT, + convertToScalableVector(DAG, ContainerVT, SrcOp1), + DAG.getConstant(0, DL, MVT::i64, /*isTarget=*/true)); + return convertFromScalableVector(DAG, VT, Splat); + } + if (NumOperands > 2) { SmallVector Ops; EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext()); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll new file mode 100644 index 0000000000000..a5d213c658c69 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +;; Patterns that lower to concat_vectors where all incoming operands are the same. + +define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i8q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <16 x i8> %data, <16 x i8> poison, <32 x i32> + store <32 x i8> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i16q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <8 x i16> %data, <8 x i16> poison, <16 x i32> + store <16 x i16> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i32q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <8 x i32> + store <8 x i32> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i64q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <2 x i64> %data, <2 x i64> poison, <4 x i32> + store <4 x i64> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_f16q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <8 x half> %data, <8 x half> poison, <16 x i32> + store <16 x half> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_bf16q_256(<8 x bfloat> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_bf16q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <8 x bfloat> %data, <8 x bfloat> poison, <16 x i32> + store <16 x bfloat> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_f32q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x float> %data, <4 x float> poison, <8 x i32> + store <8 x float> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_f64q_256: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <2 x double> %data, <2 x double> poison, <4 x i32> + store <4 x double> %splat, ptr %addr, align 1 + ret void +} + +;; Test a wider vector + +define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 { +; CHECK-LABEL: concat_i32q_512_with_256_vectors: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0, #1, mul vl] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> + store <16 x i32> %splat, ptr %addr, align 1 + ret void +} + +define void @concat_i32q_512_with_512_vectors(<4 x i32> %data, ptr %addr) #1 { +; CHECK-LABEL: concat_i32q_512_with_512_vectors: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov z0.q, q0 +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> + store <16 x i32> %splat, ptr %addr, align 1 + ret void +} + +attributes #0 = { vscale_range(2,2) "target-features"="+sve,+bf16" } +attributes #1 = { vscale_range(4,4) "target-features"="+sve,+bf16" }