Skip to content

Commit 70ec3d2

Browse files
committed
aarch64: Add support for SME2p1
This patch adds support for FEAT_SME2p1. There are two sets of new instructions: MOVAZ to read from ZA and zero the source data, and new forms of ZERO. All of them require streaming mode. MOVAZ can't reuse the existing UNSPEC_SME_READ* patterns because of the write to ZA. I did wonder about trying to use a define_subst, but it seemed a bit too awkward. gcc/ * config/aarch64/aarch64-option-extensions.def (sme2p1): New extension. * doc/invoke.texi: Document it. * config/aarch64/aarch64.h (TARGET_STREAMING_SME2p1): New macro. * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Conditionally define __ARM_FEATURE_SME2p1. * config/aarch64/iterators.md (UNSPEC_SME_READZ, UNSPEC_SME_READZ_HOR) (UNSPEC_SME_READZ_VER): New unspecs. (optab, hv): Handle them. (SME_READZ_HV): New int iterator. * config/aarch64/aarch64-sme.md (UNSPEC_SME_ZERO_SLICES): New unspec. (@aarch64_sme_<SME_READZ_HV:optab><v_int_container><mode>) (*aarch64_sme_<SME_READZ_HV:optab><v_int_container><mode>_plus) (@aarch64_sme_<SME_READZ_HV:optab><VNx1TI_ONLY:mode><SVE_FULL:mode>) (@aarch64_sme_<SME_READZ_HV:optab><SVE_FULLx24:mode><mode>) (*aarch64_sme_<SME_READZ_HV:optab><SVE_FULLx24:mode><mode>_plus) (@aarch64_sme_readz<mode>, *aarch64_sme_readz<mode>_plus) (@aarch64_sme_zero_za_slices<mode>): New patterns. (*aarch64_sme_zero_za_slices<mode>_plus): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.h (inherent_za_slice): Declare. * config/aarch64/aarch64-sve-builtins-shapes.cc (inherent_za_slice_def, inherent_za_slice): New shape. * config/aarch64/aarch64-sve-builtins-sme.h (svreadz_za) (svreadz_hor_za, svreadz_ver_za): Declare. * config/aarch64/aarch64-sve-builtins-sme.cc (svread_za_slice_base): New class, split out from... (svread_za_impl): ...here. (svreadz_za_impl, svreadz_za_tile_impl): New type aliases. (zero_slices_mode): New function. (svzero_za_impl::expand): Handle the slice forms. (svreadz_za, svreadz_hor_za, svreadz_ver_za): New functions. * config/aarch64/aarch64-sve-builtins-sme.def: Add the SME2p1 instructions. gcc/testsuite/ * lib/target-supports.exp: Test the assembler for sve-b16b16 support. * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add tests for __ARM_FEATURE_SME2p1. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za128.c: New test. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za16_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za16_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za32.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za32_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za32_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za64.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za64_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za64_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za8.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_hor_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za16.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za16_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za16_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za32.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za32_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za32_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za64.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za64_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za64_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za8.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_ver_za8_vg4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za16_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za16_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za32_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za32_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za64_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za64_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/readz_za8_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg1x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg1x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x1.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg2x4.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x1.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zero_za64_vg4x4.c: Likewise.
1 parent cdacb32 commit 70ec3d2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+5538
-11
lines changed

gcc/config/aarch64/aarch64-c.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
277277
"__ARM_FEATURE_SME_F16F16", pfile);
278278
aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
279279
aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
280+
aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
281+
"__ARM_FEATURE_SME2p1", pfile);
280282

281283
/* Not for ACLE, but required to keep "float.h" correct if we switch
282284
target between implementations that do or do not support ARMv8.2-A

gcc/config/aarch64/aarch64-option-extensions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64))
225225

226226
AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
227227

228+
AARCH64_OPT_EXTENSION("sme2p1", SME2p1, (SME2), (), (), "sme2p1")
229+
228230
AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "")
229231

230232
AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "")

gcc/config/aarch64/aarch64-sme.md

Lines changed: 216 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@
651651
;; -------------------------------------------------------------------------
652652
;; Includes:
653653
;; - MOVA
654+
;; - MOVAZ
654655
;; -------------------------------------------------------------------------
655656

656657
(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
@@ -697,6 +698,72 @@
697698
"mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
698699
)
699700

701+
(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
702+
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
703+
(unspec:SVE_FULL
704+
[(reg:<V_INT_CONTAINER> ZA_REGNUM)
705+
(reg:DI SME_STATE_REGNUM)
706+
(match_operand:DI 1 "const_int_operand")
707+
(match_operand:SI 2 "register_operand" "Ucj")
708+
(const_int 0)]
709+
SME_READZ_HV))
710+
(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
711+
(unspec:<V_INT_CONTAINER>
712+
[(reg:<V_INT_CONTAINER> ZA_REGNUM)
713+
(reg:DI SME_STATE_REGNUM)
714+
(match_dup 1)
715+
(match_dup 2)
716+
(const_int 1)]
717+
SME_READZ_HV))]
718+
"TARGET_STREAMING_SME2p1"
719+
"movaz\t%0.<Vetype>, za%1<hv>.<Vetype>[%w2, 0]"
720+
)
721+
722+
(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
723+
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
724+
(unspec:SVE_FULL
725+
[(reg:<V_INT_CONTAINER> ZA_REGNUM)
726+
(reg:DI SME_STATE_REGNUM)
727+
(match_operand:DI 1 "const_int_operand")
728+
(plus:SI (match_operand:SI 2 "register_operand" "Ucj")
729+
(match_operand:SI 3 "const_int_operand"))
730+
(const_int 0)]
731+
SME_READZ_HV))
732+
(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
733+
(unspec:<V_INT_CONTAINER>
734+
[(reg:<V_INT_CONTAINER> ZA_REGNUM)
735+
(reg:DI SME_STATE_REGNUM)
736+
(match_dup 1)
737+
(plus:SI (match_dup 2)
738+
(match_dup 3))
739+
(const_int 1)]
740+
SME_READZ_HV))]
741+
"TARGET_STREAMING_SME2p1
742+
&& UINTVAL (operands[3]) < 128 / <elem_bits>"
743+
"movaz\t%0.<Vetype>, za%1<hv>.<Vetype>[%w2, %3]"
744+
)
745+
746+
(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
747+
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
748+
(unspec:SVE_FULL
749+
[(reg:VNx1TI_ONLY ZA_REGNUM)
750+
(reg:DI SME_STATE_REGNUM)
751+
(match_operand:DI 1 "const_int_operand")
752+
(match_operand:SI 2 "register_operand" "Ucj")
753+
(const_int 0)]
754+
SME_READZ_HV))
755+
(set (reg:VNx1TI_ONLY ZA_REGNUM)
756+
(unspec:VNx1TI_ONLY
757+
[(reg:VNx1TI_ONLY ZA_REGNUM)
758+
(reg:DI SME_STATE_REGNUM)
759+
(match_dup 1)
760+
(match_dup 2)
761+
(const_int 0)]
762+
SME_READZ_HV))]
763+
"TARGET_STREAMING_SME2p1"
764+
"movaz\t%0.q, za%1<hv>.q[%w2, 0]"
765+
)
766+
700767
(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
701768
[(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
702769
(unspec:<V_INT_CONTAINER>
@@ -746,6 +813,7 @@
746813
;; -------------------------------------------------------------------------
747814
;; Includes:
748815
;; - MOVA
816+
;; - MOVAZ
749817
;; -------------------------------------------------------------------------
750818

751819
(define_insn "@aarch64_sme_<optab><mode><mode>"
@@ -782,6 +850,60 @@
782850
}
783851
)
784852

853+
(define_insn "@aarch64_sme_<optab><mode><mode>"
854+
[(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
855+
(unspec:SVE_FULLx24
856+
[(reg:SVE_FULLx24 ZA_REGNUM)
857+
(reg:DI SME_STATE_REGNUM)
858+
(match_operand:DI 1 "const_int_operand")
859+
(match_operand:SI 2 "register_operand" "Ucj")
860+
(const_int 0)]
861+
SME_READZ_HV))
862+
(set (reg:SVE_FULLx24 ZA_REGNUM)
863+
(unspec:SVE_FULLx24
864+
[(reg:SVE_FULLx24 ZA_REGNUM)
865+
(reg:DI SME_STATE_REGNUM)
866+
(match_dup 1)
867+
(match_dup 2)
868+
(const_int 1)]
869+
SME_READZ_HV))]
870+
"TARGET_STREAMING_SME2p1"
871+
{
872+
operands[3] = GEN_INT (<vector_count> - 1);
873+
return "movaz\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
874+
}
875+
)
876+
877+
(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
878+
[(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
879+
(unspec:SVE_FULLx24
880+
[(reg:SVE_FULLx24 ZA_REGNUM)
881+
(reg:DI SME_STATE_REGNUM)
882+
(match_operand:DI 1 "const_int_operand")
883+
(plus:SI
884+
(match_operand:SI 2 "register_operand" "Ucj")
885+
(match_operand:SI 3 "const_int_operand"))
886+
(const_int 0)]
887+
SME_READZ_HV))
888+
(set (reg:SVE_FULLx24 ZA_REGNUM)
889+
(unspec:SVE_FULLx24
890+
[(reg:SVE_FULLx24 ZA_REGNUM)
891+
(reg:DI SME_STATE_REGNUM)
892+
(match_dup 1)
893+
(plus:SI
894+
(match_dup 2)
895+
(match_dup 3))
896+
(const_int 1)]
897+
SME_READZ_HV))]
898+
"TARGET_STREAMING_SME2p1
899+
&& UINTVAL (operands[3]) % <vector_count> == 0
900+
&& UINTVAL (operands[3]) < 128 / <elem_bits>"
901+
{
902+
operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
903+
return "movaz\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
904+
}
905+
)
906+
785907
(define_insn "@aarch64_sme_read<mode>"
786908
[(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
787909
(unspec:SVE_DIx24
@@ -805,6 +927,46 @@
805927
"mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
806928
)
807929

930+
(define_insn "@aarch64_sme_readz<mode>"
931+
[(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
932+
(unspec:SVE_DIx24
933+
[(reg:SVE_DIx24 ZA_REGNUM)
934+
(reg:DI SME_STATE_REGNUM)
935+
(match_operand:SI 1 "register_operand" "Uci")
936+
(const_int 0)]
937+
UNSPEC_SME_READZ))
938+
(set (reg:SVE_DIx24 ZA_REGNUM)
939+
(unspec:SVE_DIx24
940+
[(reg:SVE_DIx24 ZA_REGNUM)
941+
(reg:DI SME_STATE_REGNUM)
942+
(match_dup 1)
943+
(const_int 1)]
944+
UNSPEC_SME_READZ))]
945+
"TARGET_STREAMING_SME2p1"
946+
"movaz\t%0, za.d[%w1, 0, vgx<vector_count>]"
947+
)
948+
949+
(define_insn "*aarch64_sme_readz<mode>_plus"
950+
[(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
951+
(unspec:SVE_DIx24
952+
[(reg:SVE_DIx24 ZA_REGNUM)
953+
(reg:DI SME_STATE_REGNUM)
954+
(plus:SI (match_operand:SI 1 "register_operand" "Uci")
955+
(match_operand:SI 2 "const_0_to_7_operand"))
956+
(const_int 0)]
957+
UNSPEC_SME_READZ))
958+
(set (reg:SVE_DIx24 ZA_REGNUM)
959+
(unspec:SVE_DIx24
960+
[(reg:SVE_DIx24 ZA_REGNUM)
961+
(reg:DI SME_STATE_REGNUM)
962+
(plus:SI (match_dup 1)
963+
(match_dup 2))
964+
(const_int 1)]
965+
UNSPEC_SME_READZ))]
966+
"TARGET_STREAMING_SME2p1"
967+
"movaz\t%0, za.d[%w1, %2, vgx<vector_count>]"
968+
)
969+
808970
(define_insn "@aarch64_sme_<optab><mode><mode>"
809971
[(set (reg:SVE_FULLx24 ZA_REGNUM)
810972
(unspec:SVE_FULLx24
@@ -873,7 +1035,7 @@
8731035
;; - ZERO
8741036
;; -------------------------------------------------------------------------
8751037

876-
(define_c_enum "unspec" [UNSPEC_SME_ZERO])
1038+
(define_c_enum "unspec" [UNSPEC_SME_ZERO UNSPEC_SME_ZERO_SLICES])
8771039

8781040
(define_insn "aarch64_sme_zero_za"
8791041
[(set (reg:VNx16QI ZA_REGNUM)
@@ -887,6 +1049,59 @@
8871049
}
8881050
)
8891051

1052+
(define_insn "@aarch64_sme_zero_za_slices<mode>"
1053+
[(set (reg:VNx16QI ZA_REGNUM)
1054+
(unspec:VNx16QI
1055+
[(reg:VNx16QI ZA_REGNUM)
1056+
(reg:DI SME_STATE_REGNUM)
1057+
(scratch:SME_ZA_SDIx24)
1058+
(match_operand:SI 0 "register_operand" "Uci")]
1059+
UNSPEC_SME_ZERO_SLICES))]
1060+
"TARGET_STREAMING_SME2p1"
1061+
"zero\tza.d[%w0, 0, vgx<vector_count>]"
1062+
)
1063+
1064+
(define_insn "*aarch64_sme_zero_za_slices<mode>_plus"
1065+
[(set (reg:VNx16QI ZA_REGNUM)
1066+
(unspec:VNx16QI
1067+
[(reg:VNx16QI ZA_REGNUM)
1068+
(reg:DI SME_STATE_REGNUM)
1069+
(scratch:SME_ZA_SDIx24)
1070+
(plus:SI (match_operand:SI 0 "register_operand" "Uci")
1071+
(match_operand:SI 1 "const_0_to_7_operand"))]
1072+
UNSPEC_SME_ZERO_SLICES))]
1073+
"TARGET_STREAMING_SME2p1"
1074+
"zero\tza.d[%w0, %1, vgx<vector_count>]"
1075+
)
1076+
1077+
(define_insn "@aarch64_sme_zero_za_slices<mode>"
1078+
[(set (reg:VNx16QI ZA_REGNUM)
1079+
(unspec:VNx16QI
1080+
[(reg:VNx16QI ZA_REGNUM)
1081+
(reg:DI SME_STATE_REGNUM)
1082+
(scratch:SME_ZA_BHIx124)
1083+
(match_operand:SI 0 "register_operand" "Uci")]
1084+
UNSPEC_SME_ZERO_SLICES))]
1085+
"TARGET_STREAMING_SME2p1"
1086+
"zero\tza.d[%w0, 0:<za32_last_offset><vg_modifier>]"
1087+
)
1088+
1089+
(define_insn "*aarch64_sme_zero_za_slices<mode>_plus"
1090+
[(set (reg:VNx16QI ZA_REGNUM)
1091+
(unspec:VNx16QI
1092+
[(reg:VNx16QI ZA_REGNUM)
1093+
(reg:DI SME_STATE_REGNUM)
1094+
(scratch:SME_ZA_BHIx124)
1095+
(plus:SI (match_operand:SI 0 "register_operand" "Uci")
1096+
(match_operand:SI 1 "const_<za32_offset_range>_operand"))]
1097+
UNSPEC_SME_ZERO_SLICES))]
1098+
"TARGET_STREAMING_SME2p1"
1099+
{
1100+
operands[2] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1101+
return "zero\tza.d[%w0, %1:%2<vg_modifier>]";
1102+
}
1103+
)
1104+
8901105
(define_insn "aarch64_sme_zero_zt0"
8911106
[(set (reg:V8DI ZT0_REGNUM)
8921107
(const_int 0))

gcc/config/aarch64/aarch64-sve-builtins-shapes.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,6 +2765,17 @@ struct inherent_za_def : public nonoverloaded_base
27652765
};
27662766
SHAPE (inherent_za)
27672767

2768+
/* void svfoo_t0(uint64_t). */
2769+
struct inherent_za_slice_def : public nonoverloaded_base
2770+
{
2771+
void
2772+
build (function_builder &b, const function_group_info &group) const override
2773+
{
2774+
build_all (b, "_,su32", group, MODE_none);
2775+
}
2776+
};
2777+
SHAPE (inherent_za_slice)
2778+
27682779
/* void svfoo_zt(uint64_t)
27692780
27702781
where the argument must be zero. */

gcc/config/aarch64/aarch64-sve-builtins-shapes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ namespace aarch64_sve
140140
extern const function_shape *const inherent;
141141
extern const function_shape *const inherent_b;
142142
extern const function_shape *const inherent_za;
143+
extern const function_shape *const inherent_za_slice;
143144
extern const function_shape *const inherent_zt;
144145
extern const function_shape *const inherent_mask_za;
145146
extern const function_shape *const ldr_zt;

0 commit comments

Comments
 (0)