Skip to content

Commit 09bf062

Browse files
committed
[AArch64] Add patterns for constructive splice.
SVE2 added the constructive splice instruction, which takes a tuple. Even though the register allocator must ensure that the tuple uses consecutive registers for the tuple, it's likely to be more efficient than using the destructive splice instruction when the first operand is reused.
1 parent 3bcdd60 commit 09bf062

9 files changed

+787
-765
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3846,7 +3846,7 @@ let Predicates = [HasSVE2] in {
38463846

38473847
let Predicates = [HasSVE2orSME] in {
38483848
// SVE2 vector splice (constructive)
3849-
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
3849+
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice", AArch64splice>;
38503850
} // End HasSVE2orSME
38513851

38523852
let Predicates = [HasSVE2] in {

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7245,11 +7245,33 @@ class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
72457245
let hasSideEffects = 0;
72467246
}
72477247

7248-
multiclass sve2_int_perm_splice_cons<string asm> {
7248+
multiclass sve2_int_perm_splice_cons<string asm, SDPatternOperator op> {
72497249
def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8, ZZ_b>;
72507250
def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>;
72517251
def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>;
72527252
def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
7253+
7254+
let AddedComplexity = 2 in {
7255+
foreach VT = [nxv16i8] in
7256+
def : Pat<(VT (op nxv16i1:$pred, VT:$zn1, VT:$zn2)),
7257+
(!cast<Instruction>(NAME # _B)
7258+
nxv16i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
7259+
7260+
foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
7261+
def : Pat<(VT (op nxv8i1:$pred, VT:$zn1, VT:$zn2)),
7262+
(!cast<Instruction>(NAME # _H)
7263+
nxv8i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
7264+
7265+
foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
7266+
def : Pat<(VT (op nxv4i1:$pred, VT:$zn1, VT:$zn2)),
7267+
(!cast<Instruction>(NAME # _S)
7268+
nxv4i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
7269+
7270+
foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
7271+
def : Pat<(VT (op nxv2i1:$pred, VT:$zn1, VT:$zn2)),
7272+
(!cast<Instruction>(NAME # _D)
7273+
nxv2i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
7274+
}
72537275
}
72547276

72557277
class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) {
6161
define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) {
6262
; CHECK-LABEL: concat_v16i8:
6363
; CHECK: // %bb.0:
64+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
6465
; CHECK-NEXT: ptrue p0.b, vl8
65-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
66-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
67-
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
66+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
67+
; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
6868
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
6969
; CHECK-NEXT: ret
7070
;
@@ -172,10 +172,10 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) {
172172
define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) {
173173
; CHECK-LABEL: concat_v8i16:
174174
; CHECK: // %bb.0:
175+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
175176
; CHECK-NEXT: ptrue p0.h, vl4
176-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
177-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
178-
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
177+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
178+
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
179179
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
180180
; CHECK-NEXT: ret
181181
;
@@ -270,10 +270,10 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) {
270270
define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) {
271271
; CHECK-LABEL: concat_v4i32:
272272
; CHECK: // %bb.0:
273+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
273274
; CHECK-NEXT: ptrue p0.s, vl2
274-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
275-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
276-
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
275+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
276+
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
277277
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
278278
; CHECK-NEXT: ret
279279
;
@@ -340,10 +340,10 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
340340
define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) {
341341
; CHECK-LABEL: concat_v2i64:
342342
; CHECK: // %bb.0:
343+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
343344
; CHECK-NEXT: ptrue p0.d, vl1
344-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
345-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
346-
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
345+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
346+
; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
347347
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
348348
; CHECK-NEXT: ret
349349
;
@@ -452,10 +452,10 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
452452
define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) {
453453
; CHECK-LABEL: concat_v8f16:
454454
; CHECK: // %bb.0:
455+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
455456
; CHECK-NEXT: ptrue p0.h, vl4
456-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
457-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
458-
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
457+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
458+
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
459459
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
460460
; CHECK-NEXT: ret
461461
;
@@ -550,10 +550,10 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) {
550550
define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) {
551551
; CHECK-LABEL: concat_v4f32:
552552
; CHECK: // %bb.0:
553+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
553554
; CHECK-NEXT: ptrue p0.s, vl2
554-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
555-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
556-
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
555+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
556+
; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
557557
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
558558
; CHECK-NEXT: ret
559559
;
@@ -620,10 +620,10 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
620620
define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) {
621621
; CHECK-LABEL: concat_v2f64:
622622
; CHECK: // %bb.0:
623+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
623624
; CHECK-NEXT: ptrue p0.d, vl1
624-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
625-
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
626-
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
625+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
626+
; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
627627
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
628628
; CHECK-NEXT: ret
629629
;

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -842,16 +842,16 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
842842
;
843843
; SVE2-LABEL: test_copysign_v4f32_v4f64:
844844
; SVE2: // %bb.0:
845-
; SVE2-NEXT: ldp q0, q1, [x1]
845+
; SVE2-NEXT: ldp q1, q0, [x1]
846846
; SVE2-NEXT: ptrue p0.d
847-
; SVE2-NEXT: ldr q2, [x0]
848-
; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
849847
; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
848+
; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
850849
; SVE2-NEXT: ptrue p0.s, vl2
851-
; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s
852-
; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s
853-
; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
850+
; SVE2-NEXT: uzp1 z3.s, z0.s, z0.s
851+
; SVE2-NEXT: uzp1 z2.s, z1.s, z1.s
854852
; SVE2-NEXT: mov z1.s, #0x7fffffff
853+
; SVE2-NEXT: splice z0.s, p0, { z2.s, z3.s }
854+
; SVE2-NEXT: ldr q2, [x0]
855855
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
856856
; SVE2-NEXT: str q2, [x0]
857857
; SVE2-NEXT: ret
@@ -1237,16 +1237,16 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
12371237
;
12381238
; SVE2-LABEL: test_copysign_v8f16_v8f32:
12391239
; SVE2: // %bb.0:
1240-
; SVE2-NEXT: ldp q0, q1, [x1]
1240+
; SVE2-NEXT: ldp q1, q0, [x1]
12411241
; SVE2-NEXT: ptrue p0.s
1242-
; SVE2-NEXT: ldr q2, [x0]
1243-
; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
12441242
; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
1243+
; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
12451244
; SVE2-NEXT: ptrue p0.h, vl4
1246-
; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h
1247-
; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
1248-
; SVE2-NEXT: splice z0.h, p0, z0.h, z1.h
1245+
; SVE2-NEXT: uzp1 z3.h, z0.h, z0.h
1246+
; SVE2-NEXT: uzp1 z2.h, z1.h, z1.h
12491247
; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
1248+
; SVE2-NEXT: splice z0.h, p0, { z2.h, z3.h }
1249+
; SVE2-NEXT: ldr q2, [x0]
12501250
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
12511251
; SVE2-NEXT: str q2, [x0]
12521252
; SVE2-NEXT: ret

0 commit comments

Comments
 (0)