Skip to content

Commit 0d69724

Browse files
committed
Match 128b splats expressed as concat_vectors
1 parent 523d70c commit 0d69724

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29317,6 +29317,17 @@ SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
2931729317
EVT VT = Op.getValueType();
2931829318
EVT SrcVT = SrcOp1.getValueType();
2931929319

29320+
// Match a splat of 128b segments that fit in a single register.
29321+
if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values()) &&
29322+
VT.getSizeInBits() <= Subtarget->getMinSVEVectorSizeInBits()) {
29323+
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
29324+
SDValue Splat =
29325+
DAG.getNode(AArch64ISD::DUPLANE128, DL, ContainerVT,
29326+
convertToScalableVector(DAG, ContainerVT, SrcOp1),
29327+
DAG.getConstant(0, DL, MVT::i64, /*isTarget=*/true));
29328+
return convertFromScalableVector(DAG, VT, Splat);
29329+
}
29330+
2932029331
if (NumOperands > 2) {
2932129332
SmallVector<SDValue, 4> Ops;
2932229333
EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());

llvm/test/CodeGen/AArch64/sve-fixed-length-splat-segment.ll

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 {
77
; CHECK-LABEL: concat_i8q_256:
88
; CHECK: // %bb.0:
9-
; CHECK-NEXT: ptrue p0.b, vl16
109
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
11-
; CHECK-NEXT: splice z0.b, p0, z0.b, z0.b
10+
; CHECK-NEXT: mov z0.q, q0
1211
; CHECK-NEXT: str z0, [x0]
1312
; CHECK-NEXT: ret
1413
%splat = shufflevector <16 x i8> %data, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
@@ -20,9 +19,8 @@ define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 {
2019
define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 {
2120
; CHECK-LABEL: concat_i16q_256:
2221
; CHECK: // %bb.0:
23-
; CHECK-NEXT: ptrue p0.h, vl8
2422
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
25-
; CHECK-NEXT: splice z0.h, p0, z0.h, z0.h
23+
; CHECK-NEXT: mov z0.q, q0
2624
; CHECK-NEXT: str z0, [x0]
2725
; CHECK-NEXT: ret
2826
%splat = shufflevector <8 x i16> poison, <8 x i16> %data, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
@@ -34,9 +32,8 @@ define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 {
3432
define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 {
3533
; CHECK-LABEL: concat_i32q_256:
3634
; CHECK: // %bb.0:
37-
; CHECK-NEXT: ptrue p0.s, vl4
3835
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
39-
; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s
36+
; CHECK-NEXT: mov z0.q, q0
4037
; CHECK-NEXT: str z0, [x0]
4138
; CHECK-NEXT: ret
4239
%splat = shufflevector <4 x i32> %data, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
@@ -48,9 +45,8 @@ define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 {
4845
define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 {
4946
; CHECK-LABEL: concat_i64q_256:
5047
; CHECK: // %bb.0:
51-
; CHECK-NEXT: ptrue p0.d, vl2
5248
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
53-
; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d
49+
; CHECK-NEXT: mov z0.q, q0
5450
; CHECK-NEXT: str z0, [x0]
5551
; CHECK-NEXT: ret
5652
%splat = shufflevector <2 x i64> poison, <2 x i64> %data, <4 x i32> <i32 2, i32 3,
@@ -62,9 +58,8 @@ define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 {
6258
define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 {
6359
; CHECK-LABEL: concat_f16q_256:
6460
; CHECK: // %bb.0:
65-
; CHECK-NEXT: ptrue p0.h, vl8
6661
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
67-
; CHECK-NEXT: splice z0.h, p0, z0.h, z0.h
62+
; CHECK-NEXT: mov z0.q, q0
6863
; CHECK-NEXT: str z0, [x0]
6964
; CHECK-NEXT: ret
7065
%splat = shufflevector <8 x half> poison, <8 x half> %data, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
@@ -87,9 +82,8 @@ define void @concat_bf16q_256(<8 x bfloat> %data, ptr %addr) #0 {
8782
define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 {
8883
; CHECK-LABEL: concat_f32q_256:
8984
; CHECK: // %bb.0:
90-
; CHECK-NEXT: ptrue p0.s, vl4
9185
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
92-
; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s
86+
; CHECK-NEXT: mov z0.q, q0
9387
; CHECK-NEXT: str z0, [x0]
9488
; CHECK-NEXT: ret
9589
%splat = shufflevector <4 x float> %data, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
@@ -101,9 +95,8 @@ define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 {
10195
define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 {
10296
; CHECK-LABEL: concat_f64q_256:
10397
; CHECK: // %bb.0:
104-
; CHECK-NEXT: ptrue p0.d, vl2
10598
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
106-
; CHECK-NEXT: splice z0.d, p0, z0.d, z0.d
99+
; CHECK-NEXT: mov z0.q, q0
107100
; CHECK-NEXT: str z0, [x0]
108101
; CHECK-NEXT: ret
109102
%splat = shufflevector <2 x double> poison, <2 x double> %data, <4 x i32> <i32 2, i32 3,
@@ -117,9 +110,8 @@ define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 {
117110
define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 {
118111
; CHECK-LABEL: concat_i32q_512_with_256_vectors:
119112
; CHECK: // %bb.0:
120-
; CHECK-NEXT: ptrue p0.s, vl4
121113
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
122-
; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s
114+
; CHECK-NEXT: mov z0.q, q0
123115
; CHECK-NEXT: str z0, [x0, #1, mul vl]
124116
; CHECK-NEXT: str z0, [x0]
125117
; CHECK-NEXT: ret
@@ -134,11 +126,8 @@ define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 {
134126
define void @concat_i32q_512_with_512_vectors(<4 x i32> %data, ptr %addr) #1 {
135127
; CHECK-LABEL: concat_i32q_512_with_512_vectors:
136128
; CHECK: // %bb.0:
137-
; CHECK-NEXT: ptrue p0.s, vl4
138129
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
139-
; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s
140-
; CHECK-NEXT: ptrue p0.s, vl8
141-
; CHECK-NEXT: splice z0.s, p0, z0.s, z0.s
130+
; CHECK-NEXT: mov z0.q, q0
142131
; CHECK-NEXT: str z0, [x0]
143132
; CHECK-NEXT: ret
144133
%splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3,

0 commit comments

Comments
 (0)