Skip to content

Commit 74445d6

Browse files
[SVE] Add vselect(mla/mls) patterns for cases where a multiplicand is used for the false lanes.
Differential Revision: https://reviews.llvm.org/D155972
1 parent 03f1d09 commit 74445d6

File tree

3 files changed

+128
-129
lines changed

3 files changed

+128
-129
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,19 +401,24 @@ def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
401401
(sub node:$op2, node:$op1)>;
402402
def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
403403
[(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3),
404-
// select(mask, add(a, mul(b, c)), a) -> mla(a, mask, b, c)
405404
(vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
406-
// pattern for generating pseudo for MLA_ZPmZZ/MAD_ZPmZZ
407405
def AArch64mla_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
408406
[(int_aarch64_sve_mla_u node:$pred, node:$op1, node:$op2, node:$op3),
409407
(add node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>;
408+
def AArch64mad_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
409+
[(int_aarch64_sve_mad node:$pred, node:$op1, node:$op2, node:$op3),
410+
(vselect node:$pred, (add node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op1, node:$op2)), node:$op1),
411+
(vselect node:$pred, (add node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>;
410412
def AArch64mls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
411413
[(int_aarch64_sve_mls node:$pred, node:$op1, node:$op2, node:$op3),
412-
// select(mask, sub(a, mul(b, c)), a) -> mls(a, mask, b, c)
413414
(vselect node:$pred, (sub node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
414415
def AArch64mls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
415416
[(int_aarch64_sve_mls_u node:$pred, node:$op1, node:$op2, node:$op3),
416417
(sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>;
418+
def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
419+
[(int_aarch64_sve_msb node:$pred, node:$op1, node:$op2, node:$op3),
420+
(vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op1, node:$op2)), node:$op1),
421+
(vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>;
417422
def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3),
418423
[(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3),
419424
(xor node:$op1, (xor node:$op2, node:$op3))]>;
@@ -497,8 +502,8 @@ let Predicates = [HasSVEorSME] in {
497502
defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
498503
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
499504

500-
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
501-
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb, "MLS_ZPmZZ", /*isReverseInstr*/ 1>;
505+
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
506+
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", AArch64msb_m1, "MLS_ZPmZZ", /*isReverseInstr*/ 1>;
502507
defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1, "MLA_ZPZZZ", "MAD_ZPmZZ">;
503508
defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", AArch64mls_m1, "MLS_ZPZZZ", "MSB_ZPmZZ">;
504509

llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll

Lines changed: 86 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -935,9 +935,8 @@ define <vscale x 2 x i64> @mls_nxv2i64_x(<vscale x 2 x i64> %x, <vscale x 2 x i6
935935
; CHECK-LABEL: mls_nxv2i64_x:
936936
; CHECK: // %bb.0: // %entry
937937
; CHECK-NEXT: ptrue p0.d
938-
; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
939-
; CHECK-NEXT: msb z1.d, p0/m, z0.d, z2.d
940-
; CHECK-NEXT: mov z0.d, p1/m, z1.d
938+
; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
939+
; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d
941940
; CHECK-NEXT: ret
942941
entry:
943942
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -951,9 +950,8 @@ define <vscale x 4 x i32> @mls_nxv4i32_x(<vscale x 4 x i32> %x, <vscale x 4 x i3
951950
; CHECK-LABEL: mls_nxv4i32_x:
952951
; CHECK: // %bb.0: // %entry
953952
; CHECK-NEXT: ptrue p0.s
954-
; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
955-
; CHECK-NEXT: msb z1.s, p0/m, z0.s, z2.s
956-
; CHECK-NEXT: mov z0.s, p1/m, z1.s
953+
; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
954+
; CHECK-NEXT: msb z0.s, p0/m, z1.s, z2.s
957955
; CHECK-NEXT: ret
958956
entry:
959957
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -967,9 +965,8 @@ define <vscale x 8 x i16> @mls_nxv8i16_x(<vscale x 8 x i16> %x, <vscale x 8 x i1
967965
; CHECK-LABEL: mls_nxv8i16_x:
968966
; CHECK: // %bb.0: // %entry
969967
; CHECK-NEXT: ptrue p0.h
970-
; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
971-
; CHECK-NEXT: msb z1.h, p0/m, z0.h, z2.h
972-
; CHECK-NEXT: mov z0.h, p1/m, z1.h
968+
; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
969+
; CHECK-NEXT: msb z0.h, p0/m, z1.h, z2.h
973970
; CHECK-NEXT: ret
974971
entry:
975972
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -983,9 +980,8 @@ define <vscale x 16 x i8> @mls_nxv16i8_x(<vscale x 16 x i8> %x, <vscale x 16 x i
983980
; CHECK-LABEL: mls_nxv16i8_x:
984981
; CHECK: // %bb.0: // %entry
985982
; CHECK-NEXT: ptrue p0.b
986-
; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
987-
; CHECK-NEXT: msb z1.b, p0/m, z0.b, z2.b
988-
; CHECK-NEXT: mov z0.b, p1/m, z1.b
983+
; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
984+
; CHECK-NEXT: msb z0.b, p0/m, z1.b, z2.b
989985
; CHECK-NEXT: ret
990986
entry:
991987
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1812,8 +1808,8 @@ define <vscale x 2 x i64> @srem_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i
18121808
; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
18131809
; CHECK-NEXT: movprfx z2, z0
18141810
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
1815-
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
1816-
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
1811+
; CHECK-NEXT: msb z1.d, p1/m, z2.d, z0.d
1812+
; CHECK-NEXT: mov z0.d, z1.d
18171813
; CHECK-NEXT: ret
18181814
entry:
18191815
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1829,8 +1825,8 @@ define <vscale x 4 x i32> @srem_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i
18291825
; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
18301826
; CHECK-NEXT: movprfx z2, z0
18311827
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
1832-
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
1833-
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
1828+
; CHECK-NEXT: msb z1.s, p1/m, z2.s, z0.s
1829+
; CHECK-NEXT: mov z0.d, z1.d
18341830
; CHECK-NEXT: ret
18351831
entry:
18361832
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1842,19 +1838,19 @@ entry:
18421838
define <vscale x 8 x i16> @srem_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y, <vscale x 8 x i16> %n) {
18431839
; CHECK-LABEL: srem_nxv8i16_y:
18441840
; CHECK: // %bb.0: // %entry
1845-
; CHECK-NEXT: ptrue p0.s
1841+
; CHECK-NEXT: ptrue p1.s
18461842
; CHECK-NEXT: sunpkhi z3.s, z1.h
18471843
; CHECK-NEXT: sunpkhi z4.s, z0.h
1844+
; CHECK-NEXT: ptrue p0.h
1845+
; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s
18481846
; CHECK-NEXT: sunpklo z5.s, z1.h
1849-
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
18501847
; CHECK-NEXT: sunpklo z6.s, z0.h
18511848
; CHECK-NEXT: movprfx z4, z6
1852-
; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z5.s
1853-
; CHECK-NEXT: ptrue p0.h
1854-
; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h
1855-
; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
1856-
; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h
1857-
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
1849+
; CHECK-NEXT: sdiv z4.s, p1/m, z4.s, z5.s
1850+
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
1851+
; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h
1852+
; CHECK-NEXT: msb z1.h, p0/m, z2.h, z0.h
1853+
; CHECK-NEXT: mov z0.d, z1.d
18581854
; CHECK-NEXT: ret
18591855
entry:
18601856
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1871,25 +1867,26 @@ define <vscale x 16 x i8> @srem_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x
18711867
; CHECK-NEXT: ptrue p0.s
18721868
; CHECK-NEXT: sunpkhi z5.s, z3.h
18731869
; CHECK-NEXT: sunpkhi z6.s, z4.h
1874-
; CHECK-NEXT: sunpklo z3.s, z3.h
1875-
; CHECK-NEXT: sunpklo z4.s, z4.h
1870+
; CHECK-NEXT: sunpklo z7.h, z1.b
18761871
; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
1877-
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
1878-
; CHECK-NEXT: sunpklo z4.h, z1.b
18791872
; CHECK-NEXT: sunpklo z6.h, z0.b
1880-
; CHECK-NEXT: sunpkhi z7.s, z4.h
1881-
; CHECK-NEXT: sunpkhi z24.s, z6.h
1873+
; CHECK-NEXT: sunpklo z3.s, z3.h
18821874
; CHECK-NEXT: sunpklo z4.s, z4.h
1875+
; CHECK-NEXT: sunpkhi z24.s, z7.h
1876+
; CHECK-NEXT: sunpkhi z25.s, z6.h
1877+
; CHECK-NEXT: sunpklo z7.s, z7.h
18831878
; CHECK-NEXT: sunpklo z6.s, z6.h
1884-
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z24.s
1885-
; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z6.s
1886-
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
1887-
; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h
1879+
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
1880+
; CHECK-NEXT: movprfx z4, z25
1881+
; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z24.s
1882+
; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z7.s
18881883
; CHECK-NEXT: ptrue p0.b
1889-
; CHECK-NEXT: uzp1 z3.b, z4.b, z3.b
1890-
; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
1891-
; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b
1892-
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
1884+
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
1885+
; CHECK-NEXT: uzp1 z4.h, z6.h, z4.h
1886+
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
1887+
; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b
1888+
; CHECK-NEXT: msb z1.b, p0/m, z2.b, z0.b
1889+
; CHECK-NEXT: mov z0.d, z1.d
18931890
; CHECK-NEXT: ret
18941891
entry:
18951892
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1905,8 +1902,8 @@ define <vscale x 2 x i64> @urem_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i
19051902
; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0
19061903
; CHECK-NEXT: movprfx z2, z0
19071904
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
1908-
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
1909-
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
1905+
; CHECK-NEXT: msb z1.d, p1/m, z2.d, z0.d
1906+
; CHECK-NEXT: mov z0.d, z1.d
19101907
; CHECK-NEXT: ret
19111908
entry:
19121909
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1922,8 +1919,8 @@ define <vscale x 4 x i32> @urem_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i
19221919
; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0
19231920
; CHECK-NEXT: movprfx z2, z0
19241921
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
1925-
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
1926-
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
1922+
; CHECK-NEXT: msb z1.s, p1/m, z2.s, z0.s
1923+
; CHECK-NEXT: mov z0.d, z1.d
19271924
; CHECK-NEXT: ret
19281925
entry:
19291926
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1935,19 +1932,19 @@ entry:
19351932
define <vscale x 8 x i16> @urem_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y, <vscale x 8 x i16> %n) {
19361933
; CHECK-LABEL: urem_nxv8i16_y:
19371934
; CHECK: // %bb.0: // %entry
1938-
; CHECK-NEXT: ptrue p0.s
1935+
; CHECK-NEXT: ptrue p1.s
19391936
; CHECK-NEXT: uunpkhi z3.s, z1.h
19401937
; CHECK-NEXT: uunpkhi z4.s, z0.h
1938+
; CHECK-NEXT: ptrue p0.h
1939+
; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s
19411940
; CHECK-NEXT: uunpklo z5.s, z1.h
1942-
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
19431941
; CHECK-NEXT: uunpklo z6.s, z0.h
19441942
; CHECK-NEXT: movprfx z4, z6
1945-
; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z5.s
1946-
; CHECK-NEXT: ptrue p0.h
1947-
; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h
1948-
; CHECK-NEXT: cmpgt p1.h, p0/z, z2.h, #0
1949-
; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h
1950-
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
1943+
; CHECK-NEXT: udiv z4.s, p1/m, z4.s, z5.s
1944+
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
1945+
; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h
1946+
; CHECK-NEXT: msb z1.h, p0/m, z2.h, z0.h
1947+
; CHECK-NEXT: mov z0.d, z1.d
19511948
; CHECK-NEXT: ret
19521949
entry:
19531950
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1964,25 +1961,26 @@ define <vscale x 16 x i8> @urem_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x
19641961
; CHECK-NEXT: ptrue p0.s
19651962
; CHECK-NEXT: uunpkhi z5.s, z3.h
19661963
; CHECK-NEXT: uunpkhi z6.s, z4.h
1967-
; CHECK-NEXT: uunpklo z3.s, z3.h
1968-
; CHECK-NEXT: uunpklo z4.s, z4.h
1964+
; CHECK-NEXT: uunpklo z7.h, z1.b
19691965
; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
1970-
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
1971-
; CHECK-NEXT: uunpklo z4.h, z1.b
19721966
; CHECK-NEXT: uunpklo z6.h, z0.b
1973-
; CHECK-NEXT: uunpkhi z7.s, z4.h
1974-
; CHECK-NEXT: uunpkhi z24.s, z6.h
1967+
; CHECK-NEXT: uunpklo z3.s, z3.h
19751968
; CHECK-NEXT: uunpklo z4.s, z4.h
1969+
; CHECK-NEXT: uunpkhi z24.s, z7.h
1970+
; CHECK-NEXT: uunpkhi z25.s, z6.h
1971+
; CHECK-NEXT: uunpklo z7.s, z7.h
19761972
; CHECK-NEXT: uunpklo z6.s, z6.h
1977-
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z24.s
1978-
; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z6.s
1979-
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
1980-
; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h
1973+
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
1974+
; CHECK-NEXT: movprfx z4, z25
1975+
; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z24.s
1976+
; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z7.s
19811977
; CHECK-NEXT: ptrue p0.b
1982-
; CHECK-NEXT: uzp1 z3.b, z4.b, z3.b
1983-
; CHECK-NEXT: cmpgt p1.b, p0/z, z2.b, #0
1984-
; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b
1985-
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
1978+
; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h
1979+
; CHECK-NEXT: uzp1 z4.h, z6.h, z4.h
1980+
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
1981+
; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b
1982+
; CHECK-NEXT: msb z1.b, p0/m, z2.b, z0.b
1983+
; CHECK-NEXT: mov z0.d, z1.d
19861984
; CHECK-NEXT: ret
19871985
entry:
19881986
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -2355,9 +2353,9 @@ define <vscale x 2 x i64> @mla_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
23552353
; CHECK-LABEL: mla_nxv2i64_y:
23562354
; CHECK: // %bb.0: // %entry
23572355
; CHECK-NEXT: ptrue p0.d
2358-
; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
2359-
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
2360-
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
2356+
; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
2357+
; CHECK-NEXT: mad z1.d, p0/m, z2.d, z0.d
2358+
; CHECK-NEXT: mov z0.d, z1.d
23612359
; CHECK-NEXT: ret
23622360
entry:
23632361
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -2371,9 +2369,9 @@ define <vscale x 4 x i32> @mla_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
23712369
; CHECK-LABEL: mla_nxv4i32_y:
23722370
; CHECK: // %bb.0: // %entry
23732371
; CHECK-NEXT: ptrue p0.s
2374-
; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
2375-
; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
2376-
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
2372+
; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
2373+
; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s
2374+
; CHECK-NEXT: mov z0.d, z1.d
23772375
; CHECK-NEXT: ret
23782376
entry:
23792377
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -2387,9 +2385,9 @@ define <vscale x 8 x i16> @mla_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
23872385
; CHECK-LABEL: mla_nxv8i16_y:
23882386
; CHECK: // %bb.0: // %entry
23892387
; CHECK-NEXT: ptrue p0.h
2390-
; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
2391-
; CHECK-NEXT: mla z0.h, p0/m, z1.h, z2.h
2392-
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
2388+
; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
2389+
; CHECK-NEXT: mad z1.h, p0/m, z2.h, z0.h
2390+
; CHECK-NEXT: mov z0.d, z1.d
23932391
; CHECK-NEXT: ret
23942392
entry:
23952393
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -2403,9 +2401,9 @@ define <vscale x 16 x i8> @mla_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
24032401
; CHECK-LABEL: mla_nxv16i8_y:
24042402
; CHECK: // %bb.0: // %entry
24052403
; CHECK-NEXT: ptrue p0.b
2406-
; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
2407-
; CHECK-NEXT: mla z0.b, p0/m, z1.b, z2.b
2408-
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
2404+
; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
2405+
; CHECK-NEXT: mad z1.b, p0/m, z2.b, z0.b
2406+
; CHECK-NEXT: mov z0.d, z1.d
24092407
; CHECK-NEXT: ret
24102408
entry:
24112409
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -2419,9 +2417,9 @@ define <vscale x 2 x i64> @mls_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
24192417
; CHECK-LABEL: mls_nxv2i64_y:
24202418
; CHECK: // %bb.0: // %entry
24212419
; CHECK-NEXT: ptrue p0.d
2422-
; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
2423-
; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d
2424-
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
2420+
; CHECK-NEXT: cmpgt p0.d, p0/z, z3.d, #0
2421+
; CHECK-NEXT: msb z1.d, p0/m, z0.d, z2.d
2422+
; CHECK-NEXT: mov z0.d, z1.d
24252423
; CHECK-NEXT: ret
24262424
entry:
24272425
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -2435,9 +2433,9 @@ define <vscale x 4 x i32> @mls_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
24352433
; CHECK-LABEL: mls_nxv4i32_y:
24362434
; CHECK: // %bb.0: // %entry
24372435
; CHECK-NEXT: ptrue p0.s
2438-
; CHECK-NEXT: cmpgt p1.s, p0/z, z3.s, #0
2439-
; CHECK-NEXT: msb z0.s, p0/m, z1.s, z2.s
2440-
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
2436+
; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, #0
2437+
; CHECK-NEXT: msb z1.s, p0/m, z0.s, z2.s
2438+
; CHECK-NEXT: mov z0.d, z1.d
24412439
; CHECK-NEXT: ret
24422440
entry:
24432441
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -2451,9 +2449,9 @@ define <vscale x 8 x i16> @mls_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
24512449
; CHECK-LABEL: mls_nxv8i16_y:
24522450
; CHECK: // %bb.0: // %entry
24532451
; CHECK-NEXT: ptrue p0.h
2454-
; CHECK-NEXT: cmpgt p1.h, p0/z, z3.h, #0
2455-
; CHECK-NEXT: msb z0.h, p0/m, z1.h, z2.h
2456-
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
2452+
; CHECK-NEXT: cmpgt p0.h, p0/z, z3.h, #0
2453+
; CHECK-NEXT: msb z1.h, p0/m, z0.h, z2.h
2454+
; CHECK-NEXT: mov z0.d, z1.d
24572455
; CHECK-NEXT: ret
24582456
entry:
24592457
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -2467,9 +2465,9 @@ define <vscale x 16 x i8> @mls_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
24672465
; CHECK-LABEL: mls_nxv16i8_y:
24682466
; CHECK: // %bb.0: // %entry
24692467
; CHECK-NEXT: ptrue p0.b
2470-
; CHECK-NEXT: cmpgt p1.b, p0/z, z3.b, #0
2471-
; CHECK-NEXT: msb z0.b, p0/m, z1.b, z2.b
2472-
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
2468+
; CHECK-NEXT: cmpgt p0.b, p0/z, z3.b, #0
2469+
; CHECK-NEXT: msb z1.b, p0/m, z0.b, z2.b
2470+
; CHECK-NEXT: mov z0.d, z1.d
24732471
; CHECK-NEXT: ret
24742472
entry:
24752473
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer

0 commit comments

Comments
 (0)