sifive
diff --git a/‎gcc/config/aarch64/aarch64-c.cc‎
Lines changed: 2 additions & 0 deletions b/‎gcc/config/aarch64/aarch64-c.cc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎gcc/config/aarch64/aarch64-option-extensions.def‎
Lines changed: 2 additions & 0 deletions b/‎gcc/config/aarch64/aarch64-option-extensions.def‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎gcc/config/aarch64/aarch64-sme.md‎
Lines changed: 12 additions & 8 deletions b/‎gcc/config/aarch64/aarch64-sme.md‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎gcc/config/aarch64/aarch64-sve-builtins-sme.def‎
Lines changed: 15 additions & 0 deletions b/‎gcc/config/aarch64/aarch64-sve-builtins-sme.def‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎gcc/config/aarch64/aarch64-sve-builtins.cc‎
Lines changed: 5 additions & 0 deletions b/‎gcc/config/aarch64/aarch64-sve-builtins.cc‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎gcc/config/aarch64/aarch64.h‎
Lines changed: 4 additions & 0 deletions b/‎gcc/config/aarch64/aarch64.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎gcc/config/aarch64/iterators.md‎
Lines changed: 5 additions & 2 deletions b/‎gcc/config/aarch64/iterators.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎gcc/doc/invoke.texi‎
Lines changed: 3 additions & 0 deletions b/‎gcc/doc/invoke.texi‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c‎
Lines changed: 17 additions & 0 deletions b/‎gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c‎
Lines changed: 126 additions & 0 deletions b/‎gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_bf16_vg1x2.c‎
Lines changed: 126 additions & 0 deletions
@@ -271,6 +271,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile);
   aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_B16B16),
+			"__ARM_FEATURE_SME_B16B16", pfile);
   aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_F16F16),
 			"__ARM_FEATURE_SME_F16F16", pfile);
   aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
 
@@ -225,6 +225,8 @@ AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64))
 
 AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
 
+AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "")
+
 AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "")
 
 AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
 
@@ -926,6 +926,8 @@
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - ADD
+;; - BFADD
+;; - BFSUB
 ;; - FADD
 ;; - FSUB
 ;; - SUB
@@ -965,7 +967,7 @@
 	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_BINARY_SLICE_HSDF))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
 )
 
 (define_insn "*aarch64_sme_<optab><mode>_plus"
@@ -978,7 +980,7 @@
 	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_BINARY_SLICE_HSDF))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
 )
 
 ;; -------------------------------------------------------------------------
@@ -1632,6 +1634,8 @@
 ;; ---- [FP] Ternary arithmetic on ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
+;; - BFMLA
+;; - BFMLS
 ;; - FMLA
 ;; - FMLS
 ;; -------------------------------------------------------------------------
@@ -1646,7 +1650,7 @@
 	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
 )
 
 (define_insn "*aarch64_sme_<optab><mode><mode>_plus"
@@ -1660,7 +1664,7 @@
 	   (match_operand:SME_ZA_HSDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
 )
 
 (define_insn "@aarch64_sme_single_<optab><mode><mode>"
@@ -1674,7 +1678,7 @@
 	     (match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x"))]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
 )
 
 (define_insn "*aarch64_sme_single_<optab><mode><mode>_plus"
@@ -1689,7 +1693,7 @@
 	     (match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x"))]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
 )
 
 (define_insn "@aarch64_sme_lane_<optab><mode><mode>"
@@ -1705,7 +1709,7 @@
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]"
 )
 
 (define_insn "*aarch64_sme_lane_<optab><mode><mode>"
@@ -1722,7 +1726,7 @@
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]"
 )
 
 ;; -------------------------------------------------------------------------
 
@@ -221,6 +221,21 @@ DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_float, za_m)
 DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_float, vg1x24, none)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_B16B16)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_bfloat, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_bfloat, za_m)
+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_bfloat, za_m)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_bfloat, vg1x24, none)
+#undef REQUIRED_EXTENSIONS
+
 #undef DEF_SME_ZA_FUNCTION
 #undef DEF_SME_ZA_FUNCTION_GS
 #undef DEF_SME_FUNCTION
@@ -633,6 +633,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_za_bhsd_data (S, D), \
   TYPES_reinterpret1 (D, za128)
 
+/* _za16_bf16.  */
+#define TYPES_za_h_bfloat(S, D) \
+  D (za16, bf16)
+
 /* _za16_f16.  */
 #define TYPES_za_h_float(S, D) \
   D (za16, f16)
@@ -807,6 +811,7 @@ DEF_SVE_TYPES_ARRAY (all_za);
 DEF_SVE_TYPES_ARRAY (d_za);
 DEF_SVE_TYPES_ARRAY (za_bhsd_data);
 DEF_SVE_TYPES_ARRAY (za_all_data);
+DEF_SVE_TYPES_ARRAY (za_h_bfloat);
 DEF_SVE_TYPES_ARRAY (za_h_float);
 DEF_SVE_TYPES_ARRAY (za_s_b_signed);
 DEF_SVE_TYPES_ARRAY (za_s_b_unsigned);
 
@@ -349,6 +349,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64.  */
 #define TARGET_SME_I16I64 AARCH64_HAVE_ISA (SME_I16I64)
 
+/* The FEAT_SME_B16B16 extension to SME, enabled through +sme-b16b16.  */
+#define TARGET_STREAMING_SME_B16B16 \
+  (AARCH64_HAVE_ISA (SME_B16B16) && TARGET_STREAMING)
+
 /* The FEAT_SME_F16F16 extension to SME, enabled through +sme-f16f16.  */
 #define TARGET_STREAMING_SME_F16F16 \
   (AARCH64_HAVE_ISA (SME_F16F16) && TARGET_STREAMING)
 
@@ -686,13 +686,16 @@
 				      (VNx4DF "TARGET_SME_F64F64")
 				      (VNx8DF "TARGET_SME_F64F64")
 				      (VNx16HF "TARGET_STREAMING_SME_F16F16")
-				      (VNx32HF "TARGET_STREAMING_SME_F16F16")])
+				      (VNx32HF "TARGET_STREAMING_SME_F16F16")
+				      (VNx16BF "TARGET_STREAMING_SME_B16B16")
+				      (VNx32BF "TARGET_STREAMING_SME_B16B16")])
 
 ;; The modes for which outer product instructions are supported.
 (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
 (define_mode_iterator SME_MOP_HSDF [VNx4SF
 				    (VNx2DF "TARGET_SME_F64F64")
-				    (VNx8HF "TARGET_STREAMING_SME_F16F16")])
+				    (VNx8HF "TARGET_STREAMING_SME_F16F16")
+				    (VNx8BF "TARGET_STREAMING_SME_B16B16")])
 
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
 
@@ -21854,6 +21854,9 @@ Enable the FEAT_SME_F64F64 extension to SME.  This also enables SME
 instructions.
 @item sme2
 Enable the Scalable Matrix Extension 2.  This also enables SME instructions.
+@item sme-b16b16
+Enable the FEAT_SME_B16B16 extension to SME.  This also enables SME2
+and SVE_B16B16 instructions.
 @item sme-f16f16
 Enable the FEAT_SME_F16F16 extension to SME.  This also enables SME2
 instructions.
 
@@ -70,6 +70,9 @@
 #ifdef __ARM_FEATURE_SME_I16I64
 #error Foo
 #endif
+#ifdef __ARM_FEATURE_SME_B16B16
+#error Foo
+#endif
 #ifdef __ARM_FEATURE_SME_F16F16
 #error Foo
 #endif
@@ -88,6 +91,20 @@
 #error Foo
 #endif
 
+#pragma GCC target "+nothing+sme-b16b16"
+#ifndef __ARM_FEATURE_SME_B16B16
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME2
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SVE_B16B16
+#error Foo
+#endif
+
 #pragma GCC target "+nothing+sme-f16f16"
 #ifndef __ARM_FEATURE_SME_F16F16
 #error Foo
 
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sme-b16b16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-b16b16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-b16b16"
+
+/*
+** add_0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	bfadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_0_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (0, z0),
+	    svadd_za16_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	bfadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w0_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w0, z0),
+	    svadd_za16_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+**	mov	(w8|w9|w10|w11), w7
+**	bfadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w7_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w7, z0),
+	    svadd_za16_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+**	bfadd	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8, z0),
+	    svadd_za16_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+**	bfadd	za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w11_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w11, z0),
+	    svadd_za16_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+**	mov	(w8|w9|w10|w11), w12
+**	bfadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w12_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w12, z0),
+	    svadd_za16_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+**	bfadd	za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8 + 7, z0),
+	    svadd_za16_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	bfadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8 + 8, z0),
+	    svadd_za16_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	bfadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8 - 1, z0),
+	    svadd_za16_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+**	bfadd	za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z18, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8, z18),
+	    svadd_za16_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** add_w8_z23:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	bfadd	za\.h\[w8, 0, vgx2\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (add_w8_z23, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8, z23),
+	    svadd_za16_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+**	bfadd	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z28, svbfloat16x2_t,
+	    svadd_za16_bf16_vg1x2 (w8, z28),
+	    svadd_za16_vg1x2 (w8, z28))