Skip to content

Commit 659ea54

Browse files
committed
[AArch64][SVE] Detect MOV (imm, pred, zeroing/merging)
Add patterns to fold MOV (scalar, predicated) to MOV (imm, pred, merging) or MOV (imm, pred, zeroing) as appropriate.
1 parent 247bbdd commit 659ea54

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,26 @@ let Predicates = [HasSVEorSME] in {
892892
def : Pat<(nxv2i64 (splat_vector (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))),
893893
(DUP_ZI_D $a, $b)>;
894894

895+
// Duplicate Int immediate to active vector elements (zeroing).
896+
def : Pat<(nxv16i8 (AArch64dup_mt PPR:$pg, (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)), (SVEDup0Undef))),
897+
(CPY_ZPzI_B $pg, $a, $b)>;
898+
def : Pat<(nxv8i16 (AArch64dup_mt PPR:$pg, (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)), (SVEDup0Undef))),
899+
(CPY_ZPzI_H $pg, $a, $b)>;
900+
def : Pat<(nxv4i32 (AArch64dup_mt PPR:$pg, (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)), (SVEDup0Undef))),
901+
(CPY_ZPzI_S $pg, $a, $b)>;
902+
def : Pat<(nxv2i64 (AArch64dup_mt PPR:$pg, (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)), (SVEDup0Undef))),
903+
(CPY_ZPzI_D $pg, $a, $b)>;
904+
905+
// Duplicate Int immediate to active vector elements (merging).
906+
def : Pat<(nxv16i8 (AArch64dup_mt PPR:$pg, (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)), (nxv16i8 ZPR:$z))),
907+
(CPY_ZPmI_B $z, $pg, $a, $b)>;
908+
def : Pat<(nxv8i16 (AArch64dup_mt PPR:$pg, (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)), (nxv8i16 ZPR:$z))),
909+
(CPY_ZPmI_H $z, $pg, $a, $b)>;
910+
def : Pat<(nxv4i32 (AArch64dup_mt PPR:$pg, (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)), (nxv4i32 ZPR:$z))),
911+
(CPY_ZPmI_S $z, $pg, $a, $b)>;
912+
def : Pat<(nxv2i64 (AArch64dup_mt PPR:$pg, (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)), (nxv2i64 ZPR:$z))),
913+
(CPY_ZPmI_D $z, $pg, $a, $b)>;
914+
895915
// Duplicate immediate FP into all vector elements.
896916
def : Pat<(nxv2f16 (splat_vector (f16 fpimm:$val))),
897917
(DUP_ZR_H (MOVi32imm (bitcast_fpimm_to_i32 f16:$val)))>;

llvm/test/CodeGen/AArch64/sve-mov-imm-pred.ll

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
define dso_local <vscale x 16 x i8> @mov_z_b(<vscale x 16 x i1> %pg) {
77
; CHECK-LABEL: mov_z_b:
88
; CHECK: // %bb.0:
9-
; CHECK-NEXT: mov z0.b, #0 // =0x0
10-
; CHECK-NEXT: mov w8, #1 // =0x1
11-
; CHECK-NEXT: mov z0.b, p0/m, w8
9+
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
1210
; CHECK-NEXT: ret
1311
%r = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, i8 1)
1412
ret <vscale x 16 x i8> %r
@@ -17,9 +15,7 @@ define dso_local <vscale x 16 x i8> @mov_z_b(<vscale x 16 x i1> %pg) {
1715
define dso_local <vscale x 8 x i16> @mov_z_h(<vscale x 8 x i1> %pg) {
1816
; CHECK-LABEL: mov_z_h:
1917
; CHECK: // %bb.0:
20-
; CHECK-NEXT: mov z0.h, #0 // =0x0
21-
; CHECK-NEXT: mov w8, #1 // =0x1
22-
; CHECK-NEXT: mov z0.h, p0/m, w8
18+
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
2319
; CHECK-NEXT: ret
2420
%r = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, i16 1)
2521
ret <vscale x 8 x i16> %r
@@ -28,9 +24,7 @@ define dso_local <vscale x 8 x i16> @mov_z_h(<vscale x 8 x i1> %pg) {
2824
define dso_local <vscale x 4 x i32> @mov_z_s(<vscale x 4 x i1> %pg) {
2925
; CHECK-LABEL: mov_z_s:
3026
; CHECK: // %bb.0:
31-
; CHECK-NEXT: mov z0.s, #0 // =0x0
32-
; CHECK-NEXT: mov w8, #1 // =0x1
33-
; CHECK-NEXT: mov z0.s, p0/m, w8
27+
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
3428
; CHECK-NEXT: ret
3529
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i32 1)
3630
ret <vscale x 4 x i32> %r
@@ -39,9 +33,7 @@ define dso_local <vscale x 4 x i32> @mov_z_s(<vscale x 4 x i1> %pg) {
3933
define dso_local <vscale x 2 x i64> @mov_z_d(<vscale x 2 x i1> %pg) {
4034
; CHECK-LABEL: mov_z_d:
4135
; CHECK: // %bb.0:
42-
; CHECK-NEXT: mov z0.d, #0 // =0x0
43-
; CHECK-NEXT: mov w8, #1 // =0x1
44-
; CHECK-NEXT: mov z0.d, p0/m, x8
36+
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
4537
; CHECK-NEXT: ret
4638
%r = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, i64 1)
4739
ret <vscale x 2 x i64> %r
@@ -52,8 +44,7 @@ define dso_local <vscale x 2 x i64> @mov_z_d(<vscale x 2 x i1> %pg) {
5244
define dso_local <vscale x 16 x i8> @mov_m_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg) {
5345
; CHECK-LABEL: mov_m_b:
5446
; CHECK: // %bb.0:
55-
; CHECK-NEXT: mov w8, #1 // =0x1
56-
; CHECK-NEXT: mov z0.b, p0/m, w8
47+
; CHECK-NEXT: mov z0.b, p0/m, #1 // =0x1
5748
; CHECK-NEXT: ret
5849
%r = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i8 1)
5950
ret <vscale x 16 x i8> %r
@@ -62,8 +53,7 @@ define dso_local <vscale x 16 x i8> @mov_m_b(<vscale x 16 x i8> %zd, <vscale x 1
6253
define dso_local <vscale x 8 x i16> @mov_m_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg) {
6354
; CHECK-LABEL: mov_m_h:
6455
; CHECK: // %bb.0:
65-
; CHECK-NEXT: mov w8, #1 // =0x1
66-
; CHECK-NEXT: mov z0.h, p0/m, w8
56+
; CHECK-NEXT: mov z0.h, p0/m, #1 // =0x1
6757
; CHECK-NEXT: ret
6858
%r = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i16 1)
6959
ret <vscale x 8 x i16> %r
@@ -72,8 +62,7 @@ define dso_local <vscale x 8 x i16> @mov_m_h(<vscale x 8 x i16> %zd, <vscale x 8
7262
define dso_local <vscale x 4 x i32> @mov_m_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg) {
7363
; CHECK-LABEL: mov_m_s:
7464
; CHECK: // %bb.0:
75-
; CHECK-NEXT: mov w8, #1 // =0x1
76-
; CHECK-NEXT: mov z0.s, p0/m, w8
65+
; CHECK-NEXT: mov z0.s, p0/m, #1 // =0x1
7766
; CHECK-NEXT: ret
7867
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 1)
7968
ret <vscale x 4 x i32> %r
@@ -82,8 +71,7 @@ define dso_local <vscale x 4 x i32> @mov_m_s(<vscale x 4 x i32> %zd, <vscale x 4
8271
define dso_local <vscale x 2 x i64> @mov_m_d(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg) {
8372
; CHECK-LABEL: mov_m_d:
8473
; CHECK: // %bb.0:
85-
; CHECK-NEXT: mov w8, #1 // =0x1
86-
; CHECK-NEXT: mov z0.d, p0/m, x8
74+
; CHECK-NEXT: mov z0.d, p0/m, #1 // =0x1
8775
; CHECK-NEXT: ret
8876
%r = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> %zd, <vscale x 2 x i1> %pg, i64 1)
8977
ret <vscale x 2 x i64> %r

0 commit comments

Comments
 (0)