Skip to content

Commit cdacb32

Browse files
committed
aarch64: Add support for SME_B16B16
This patch adds support for the SME_B16B16 extension. It follows similar lines to the SME_F16F16 extension added earlier. gcc/ * config/aarch64/aarch64-option-extensions.def (sme-b16b16): New extension. * doc/invoke.texi: Document it. * config/aarch64/aarch64.h (TARGET_STREAMING_SME_B16B16): New macro. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Conditionally define __ARM_FEATURE_SME_B16B16. * config/aarch64/aarch64-sve-builtins-sme.def: Add SME_B16B16 forms of existing intrinsics. * config/aarch64/aarch64-sme.md (@aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>) (*aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>_plus) (@aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus) (@aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus) (@aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (*aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>) (@aarch64_sme_<SME_FP_MOP:optab><mode><mode>): Extend to BF16 modes. * config/aarch64/aarch64-sve-builtins.cc (TYPES_za_h_bfloat): New type macro. * config/aarch64/iterators.md (SME_ZA_HSDFx24): Add BF16 modes. (SME_MOP_HSDF): Likewise. gcc/testsuite/ * lib/target-supports.exp: Test the assembler for sve-b16b16 support. * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add tests for __ARM_FEATURE_SME_B16B16. * gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mls_za16_bf16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mopa_za16_bf16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mops_za16_bf16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/sub_za16_bf16_vg1x4.c: Likewise.
1 parent e6751e1 commit cdacb32

24 files changed

+1824
-11
lines changed

gcc/config/aarch64/aarch64-c.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
271271

272272
aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile);
273273
aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
274+
aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_B16B16),
275+
"__ARM_FEATURE_SME_B16B16", pfile);
274276
aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_F16F16),
275277
"__ARM_FEATURE_SME_F16F16", pfile);
276278
aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);

gcc/config/aarch64/aarch64-option-extensions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64))
225225

226226
AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
227227

228+
AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "")
229+
228230
AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "")
229231

230232
AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")

gcc/config/aarch64/aarch64-sme.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,8 @@
926926
;; -------------------------------------------------------------------------
927927
;; Includes:
928928
;; - ADD
929+
;; - BFADD
930+
;; - BFSUB
929931
;; - FADD
930932
;; - FSUB
931933
;; - SUB
@@ -965,7 +967,7 @@
965967
(match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
966968
SME_BINARY_SLICE_HSDF))]
967969
"TARGET_STREAMING_SME2"
968-
"<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
970+
"<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
969971
)
970972

971973
(define_insn "*aarch64_sme_<optab><mode>_plus"
@@ -978,7 +980,7 @@
978980
(match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
979981
SME_BINARY_SLICE_HSDF))]
980982
"TARGET_STREAMING_SME2"
981-
"<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
983+
"<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
982984
)
983985

984986
;; -------------------------------------------------------------------------
@@ -1632,6 +1634,8 @@
16321634
;; ---- [FP] Ternary arithmetic on ZA slice
16331635
;; -------------------------------------------------------------------------
16341636
;; Includes:
1637+
;; - BFMLA
1638+
;; - BFMLS
16351639
;; - FMLA
16361640
;; - FMLS
16371641
;; -------------------------------------------------------------------------
@@ -1646,7 +1650,7 @@
16461650
(match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
16471651
SME_FP_TERNARY_SLICE))]
16481652
"TARGET_STREAMING_SME2"
1649-
"<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1653+
"<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
16501654
)
16511655

16521656
(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
@@ -1660,7 +1664,7 @@
16601664
(match_operand:SME_ZA_HSDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
16611665
SME_FP_TERNARY_SLICE))]
16621666
"TARGET_STREAMING_SME2"
1663-
"<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1667+
"<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
16641668
)
16651669

16661670
(define_insn "@aarch64_sme_single_<optab><mode><mode>"
@@ -1674,7 +1678,7 @@
16741678
(match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x"))]
16751679
SME_FP_TERNARY_SLICE))]
16761680
"TARGET_STREAMING_SME2"
1677-
"<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
1681+
"<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
16781682
)
16791683

16801684
(define_insn "*aarch64_sme_single_<optab><mode><mode>_plus"
@@ -1689,7 +1693,7 @@
16891693
(match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x"))]
16901694
SME_FP_TERNARY_SLICE))]
16911695
"TARGET_STREAMING_SME2"
1692-
"<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
1696+
"<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
16931697
)
16941698

16951699
(define_insn "@aarch64_sme_lane_<optab><mode><mode>"
@@ -1705,7 +1709,7 @@
17051709
UNSPEC_SVE_LANE_SELECT)]
17061710
SME_FP_TERNARY_SLICE))]
17071711
"TARGET_STREAMING_SME2"
1708-
"<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]"
1712+
"<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]"
17091713
)
17101714

17111715
(define_insn "*aarch64_sme_lane_<optab><mode><mode>"
@@ -1722,7 +1726,7 @@
17221726
UNSPEC_SVE_LANE_SELECT)]
17231727
SME_FP_TERNARY_SLICE))]
17241728
"TARGET_STREAMING_SME2"
1725-
"<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]"
1729+
"<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]"
17261730
)
17271731

17281732
;; -------------------------------------------------------------------------

gcc/config/aarch64/aarch64-sve-builtins-sme.def

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,21 @@ DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_float, za_m)
221221
DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_float, vg1x24, none)
222222
#undef REQUIRED_EXTENSIONS
223223

224+
#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_B16B16)
225+
DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_bfloat, vg1x24, none)
226+
DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_bfloat,
227+
vg1x24, none)
228+
DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_bfloat,
229+
vg1x24, none)
230+
DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_bfloat,
231+
vg1x24, none)
232+
DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_bfloat,
233+
vg1x24, none)
234+
DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_bfloat, za_m)
235+
DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_bfloat, za_m)
236+
DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_bfloat, vg1x24, none)
237+
#undef REQUIRED_EXTENSIONS
238+
224239
#undef DEF_SME_ZA_FUNCTION
225240
#undef DEF_SME_ZA_FUNCTION_GS
226241
#undef DEF_SME_FUNCTION

gcc/config/aarch64/aarch64-sve-builtins.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
633633
TYPES_za_bhsd_data (S, D), \
634634
TYPES_reinterpret1 (D, za128)
635635

636+
/* _za16_bf16. */
637+
#define TYPES_za_h_bfloat(S, D) \
638+
D (za16, bf16)
639+
636640
/* _za16_f16. */
637641
#define TYPES_za_h_float(S, D) \
638642
D (za16, f16)
@@ -807,6 +811,7 @@ DEF_SVE_TYPES_ARRAY (all_za);
807811
DEF_SVE_TYPES_ARRAY (d_za);
808812
DEF_SVE_TYPES_ARRAY (za_bhsd_data);
809813
DEF_SVE_TYPES_ARRAY (za_all_data);
814+
DEF_SVE_TYPES_ARRAY (za_h_bfloat);
810815
DEF_SVE_TYPES_ARRAY (za_h_float);
811816
DEF_SVE_TYPES_ARRAY (za_s_b_signed);
812817
DEF_SVE_TYPES_ARRAY (za_s_b_unsigned);

gcc/config/aarch64/aarch64.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
349349
/* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */
350350
#define TARGET_SME_I16I64 AARCH64_HAVE_ISA (SME_I16I64)
351351

352+
/* The FEAT_SME_B16B16 extension to SME, enabled through +sme-b16b16. */
353+
#define TARGET_STREAMING_SME_B16B16 \
354+
(AARCH64_HAVE_ISA (SME_B16B16) && TARGET_STREAMING)
355+
352356
/* The FEAT_SME_F16F16 extension to SME, enabled through +sme-f16f16. */
353357
#define TARGET_STREAMING_SME_F16F16 \
354358
(AARCH64_HAVE_ISA (SME_F16F16) && TARGET_STREAMING)

gcc/config/aarch64/iterators.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -686,13 +686,16 @@
686686
(VNx4DF "TARGET_SME_F64F64")
687687
(VNx8DF "TARGET_SME_F64F64")
688688
(VNx16HF "TARGET_STREAMING_SME_F16F16")
689-
(VNx32HF "TARGET_STREAMING_SME_F16F16")])
689+
(VNx32HF "TARGET_STREAMING_SME_F16F16")
690+
(VNx16BF "TARGET_STREAMING_SME_B16B16")
691+
(VNx32BF "TARGET_STREAMING_SME_B16B16")])
690692

691693
;; The modes for which outer product instructions are supported.
692694
(define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
693695
(define_mode_iterator SME_MOP_HSDF [VNx4SF
694696
(VNx2DF "TARGET_SME_F64F64")
695-
(VNx8HF "TARGET_STREAMING_SME_F16F16")])
697+
(VNx8HF "TARGET_STREAMING_SME_F16F16")
698+
(VNx8BF "TARGET_STREAMING_SME_B16B16")])
696699

697700
;; ------------------------------------------------------------------
698701
;; Unspec enumerations for Advance SIMD. These could well go into

gcc/doc/invoke.texi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21854,6 +21854,9 @@ Enable the FEAT_SME_F64F64 extension to SME. This also enables SME
2185421854
instructions.
2185521855
@item sme2
2185621856
Enable the Scalable Matrix Extension 2. This also enables SME instructions.
21857+
@item sme-b16b16
21858+
Enable the FEAT_SME_B16B16 extension to SME. This also enables SME2
21859+
and SVE_B16B16 instructions.
2185721860
@item sme-f16f16
2185821861
Enable the FEAT_SME_F16F16 extension to SME. This also enables SME2
2185921862
instructions.

gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@
7070
#ifdef __ARM_FEATURE_SME_I16I64
7171
#error Foo
7272
#endif
73+
#ifdef __ARM_FEATURE_SME_B16B16
74+
#error Foo
75+
#endif
7376
#ifdef __ARM_FEATURE_SME_F16F16
7477
#error Foo
7578
#endif
@@ -88,6 +91,20 @@
8891
#error Foo
8992
#endif
9093

94+
#pragma GCC target "+nothing+sme-b16b16"
95+
#ifndef __ARM_FEATURE_SME_B16B16
96+
#error Foo
97+
#endif
98+
#ifndef __ARM_FEATURE_SME
99+
#error Foo
100+
#endif
101+
#ifndef __ARM_FEATURE_SME2
102+
#error Foo
103+
#endif
104+
#ifndef __ARM_FEATURE_SVE_B16B16
105+
#error Foo
106+
#endif
107+
91108
#pragma GCC target "+nothing+sme-f16f16"
92109
#ifndef __ARM_FEATURE_SME_F16F16
93110
#error Foo
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */
2+
/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */
3+
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
4+
5+
#include "test_sme2_acle.h"
6+
7+
#pragma GCC target "+sme-b16b16"
8+
9+
/*
10+
** add_0_z0:
11+
** mov (w8|w9|w10|w11), #?0
12+
** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
13+
** ret
14+
*/
15+
TEST_ZA_XN (add_0_z0, svbfloat16x2_t,
16+
svadd_za16_bf16_vg1x2 (0, z0),
17+
svadd_za16_vg1x2 (0, z0))
18+
19+
/*
20+
** add_w0_z0:
21+
** mov (w8|w9|w10|w11), w0
22+
** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
23+
** ret
24+
*/
25+
TEST_ZA_XN (add_w0_z0, svbfloat16x2_t,
26+
svadd_za16_bf16_vg1x2 (w0, z0),
27+
svadd_za16_vg1x2 (w0, z0))
28+
29+
/*
30+
** add_w7_z0:
31+
** mov (w8|w9|w10|w11), w7
32+
** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
33+
** ret
34+
*/
35+
TEST_ZA_XN (add_w7_z0, svbfloat16x2_t,
36+
svadd_za16_bf16_vg1x2 (w7, z0),
37+
svadd_za16_vg1x2 (w7, z0))
38+
39+
/*
40+
** add_w8_z0:
41+
** bfadd za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}
42+
** ret
43+
*/
44+
TEST_ZA_XN (add_w8_z0, svbfloat16x2_t,
45+
svadd_za16_bf16_vg1x2 (w8, z0),
46+
svadd_za16_vg1x2 (w8, z0))
47+
48+
/*
49+
** add_w11_z0:
50+
** bfadd za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h}
51+
** ret
52+
*/
53+
TEST_ZA_XN (add_w11_z0, svbfloat16x2_t,
54+
svadd_za16_bf16_vg1x2 (w11, z0),
55+
svadd_za16_vg1x2 (w11, z0))
56+
57+
58+
/*
59+
** add_w12_z0:
60+
** mov (w8|w9|w10|w11), w12
61+
** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
62+
** ret
63+
*/
64+
TEST_ZA_XN (add_w12_z0, svbfloat16x2_t,
65+
svadd_za16_bf16_vg1x2 (w12, z0),
66+
svadd_za16_vg1x2 (w12, z0))
67+
68+
/*
69+
** add_w8p7_z0:
70+
** bfadd za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}
71+
** ret
72+
*/
73+
TEST_ZA_XN (add_w8p7_z0, svbfloat16x2_t,
74+
svadd_za16_bf16_vg1x2 (w8 + 7, z0),
75+
svadd_za16_vg1x2 (w8 + 7, z0))
76+
77+
/*
78+
** add_w8p8_z0:
79+
** add (w8|w9|w10|w11), w8, #?8
80+
** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
81+
** ret
82+
*/
83+
TEST_ZA_XN (add_w8p8_z0, svbfloat16x2_t,
84+
svadd_za16_bf16_vg1x2 (w8 + 8, z0),
85+
svadd_za16_vg1x2 (w8 + 8, z0))
86+
87+
/*
88+
** add_w8m1_z0:
89+
** sub (w8|w9|w10|w11), w8, #?1
90+
** bfadd za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
91+
** ret
92+
*/
93+
TEST_ZA_XN (add_w8m1_z0, svbfloat16x2_t,
94+
svadd_za16_bf16_vg1x2 (w8 - 1, z0),
95+
svadd_za16_vg1x2 (w8 - 1, z0))
96+
97+
/*
98+
** add_w8_z18:
99+
** bfadd za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}
100+
** ret
101+
*/
102+
TEST_ZA_XN (add_w8_z18, svbfloat16x2_t,
103+
svadd_za16_bf16_vg1x2 (w8, z18),
104+
svadd_za16_vg1x2 (w8, z18))
105+
106+
/* Leave the assembler to check for correctness for misaligned registers. */
107+
108+
/*
109+
** add_w8_z23:
110+
** mov [^\n]+
111+
** mov [^\n]+
112+
** bfadd za\.h\[w8, 0, vgx2\], [^\n]+
113+
** ret
114+
*/
115+
TEST_ZA_XN (add_w8_z23, svbfloat16x2_t,
116+
svadd_za16_bf16_vg1x2 (w8, z23),
117+
svadd_za16_vg1x2 (w8, z23))
118+
119+
/*
120+
** add_w8_z28:
121+
** bfadd za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}
122+
** ret
123+
*/
124+
TEST_ZA_XN (add_w8_z28, svbfloat16x2_t,
125+
svadd_za16_bf16_vg1x2 (w8, z28),
126+
svadd_za16_vg1x2 (w8, z28))

0 commit comments

Comments
 (0)