@@ -376,15 +376,20 @@ let SMETargetGuard = "sme2" in {
376376// Outer product and accumulate/subtract
377377//
378378
379- multiclass MOP4SingleSingle <string name, string n, string t, string i, string wide> {
380- def NAME : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
379+ multiclass MOP4 <string name, string n, string t, string i, string wide> {
380+ def NAME # "_1x1" : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
381381}
382382
383- multiclass MOP4MixedSignsSingleSingle<string n_suffix1, string n_suffix2, string za, string t> {
384- def NAME : SInst<"sv" # n_suffix2 # "_1x1_" # za # "[_{2}_{3}]",
385- "vid" # !cond(!eq(n_suffix1, "su") : "u", true: "x"),
386- !cond(!eq(n_suffix1, "su") : "", true: "U") # t,
387- MergeNone, "aarch64_sme_" # n_suffix1 # n_suffix2 # "_wide_1x1",
383+ multiclass SUMOP4<string s, string za, string t> {
384+ def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]",
385+ "vidu", t, MergeNone, "aarch64_sme_sumop4" # s # "_wide_1x1",
386+ [IsStreaming, IsInOutZA],
387+ [ImmCheck<0, ImmCheck0_3>]>;
388+ }
389+
390+ multiclass USMOP4<string s, string za, string t> {
391+ def _1x1 : SInst<"svmop4" # s # "[_1x1_]" # za # "[_{2}_{3}]",
392+ "vidx", t, MergeNone, "aarch64_sme_usmop4" # s # "_wide_1x1",
388393 [IsStreaming, IsInOutZA],
389394 [ImmCheck<0, ImmCheck0_3>]>;
390395}
@@ -400,28 +405,24 @@ let SMETargetGuard = "sme2" in {
400405
401406 def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
402407
403- defm SVSMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "s", "aarch64_sme_smop4a", "_wide">;
404- defm SVSMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "s", "aarch64_sme_smop4s", "_wide">;
405- defm SVSMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "c", "aarch64_sme_smop4a", "_wide">;
406- defm SVSMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "c", "aarch64_sme_smop4s", "_wide">;
408+ defm SVSMOP4A_H : MOP4<"a", "za32", "cs", "aarch64_sme_smop4a", "_wide">;
409+ defm SVSMOP4S_H : MOP4<"s", "za32", "cs", "aarch64_sme_smop4s", "_wide">;
407410
408- defm SVUMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "Us", "aarch64_sme_umop4a", "_wide">;
409- defm SVUMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "Us", "aarch64_sme_umop4s", "_wide">;
410- defm SVUMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "Uc", "aarch64_sme_umop4a", "_wide">;
411- defm SVUMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "Uc", "aarch64_sme_umop4s", "_wide">;
411+ defm SVUMOP4A_H : MOP4<"a", "za32", "UcUs", "aarch64_sme_umop4a", "_wide">;
412+ defm SVUMOP4S_H : MOP4<"s", "za32", "UcUs", "aarch64_sme_umop4s", "_wide">;
412413
413- defm SVFMOP4A_MZZ_HtoS : MOP4SingleSingle <"a", "za32", "h", "aarch64_sme_mop4a", "_wide">;
414- defm SVFMOP4S_MZZ_HtoS : MOP4SingleSingle <"s", "za32", "h", "aarch64_sme_mop4s", "_wide">;
415- defm SVFMOP4A_MZZ_S : MOP4SingleSingle <"a", "za32", "f", "aarch64_sme_mop4a", "">;
416- defm SVFMOP4S_MZZ_S : MOP4SingleSingle <"s", "za32", "f", "aarch64_sme_mop4s", "">;
414+ defm SVFMOP4A_HtoS : MOP4 <"a", "za32", "h", "aarch64_sme_mop4a", "_wide">;
415+ defm SVFMOP4S_HtoS : MOP4 <"s", "za32", "h", "aarch64_sme_mop4s", "_wide">;
416+ defm SVFMOP4A_S : MOP4 <"a", "za32", "f", "aarch64_sme_mop4a", "">;
417+ defm SVFMOP4S_S : MOP4 <"s", "za32", "f", "aarch64_sme_mop4s", "">;
417418
418- defm SVBMOP4A_MZZ_S : MOP4SingleSingle <"a", "za32", "b", "aarch64_sme_mop4a", "_wide">;
419- defm SVBMOP4S_MZZ_S : MOP4SingleSingle <"s", "za32", "b", "aarch64_sme_mop4s", "_wide">;
419+ defm SVBMOP4A_S : MOP4 <"a", "za32", "b", "aarch64_sme_mop4a", "_wide">;
420+ defm SVBMOP4S_S : MOP4 <"s", "za32", "b", "aarch64_sme_mop4s", "_wide">;
420421
421- defm SVSUMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su ", "mop4a", " za32", "c ">;
422- defm SVUSMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us ", "mop4a", " za32", "c ">;
423- defm SVSUMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su ", "mop4s", " za32", "c ">;
424- defm SVUSMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us ", "mop4s", " za32", "c ">;
422+ defm SVSUMOP4A_S : SUMOP4<"a ", "za32", "cs ">;
423+ defm SVSUMOP4S_S : SUMOP4<"s ", "za32", "cs ">;
424+ defm SVUSMOP4A_S : USMOP4<"a ", "za32", "UcUs ">;
425+ defm SVUSMOP4S_S : USMOP4<"s ", "za32", "UcUs ">;
425426
426427 // VERTICAL DOT-PRODUCT
427428 def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>;
@@ -473,14 +474,14 @@ let SMETargetGuard = "sme2" in {
473474}
474475
475476let SMETargetGuard = "sme2,sme-i16i64" in {
476- defm SVSMOP4A_MZZ_HtoD : MOP4SingleSingle <"a", "za64", "s", "aarch64_sme_smop4a", "_wide">;
477- defm SVSMOP4S_MZZ_HtoD : MOP4SingleSingle <"s", "za64", "s", "aarch64_sme_smop4s", "_wide">;
478- defm SVUMOP4A_MZZ_HtoD : MOP4SingleSingle <"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">;
479- defm SVUMOP4S_MZZ_HtoD : MOP4SingleSingle <"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">;
480- defm SVSUMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4a ", "za64", "s">;
481- defm SVUSMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4a ", "za64", "s">;
482- defm SVSUMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su ", "mop4s", " za64", "s ">;
483- defm SVUSMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us ", "mop4s", " za64", "s ">;
477+ defm SVSMOP4A_HtoD : MOP4 <"a", "za64", "s", "aarch64_sme_smop4a", "_wide">;
478+ defm SVSMOP4S_HtoD : MOP4 <"s", "za64", "s", "aarch64_sme_smop4s", "_wide">;
479+ defm SVUMOP4A_HtoD : MOP4 <"a", "za64", "Us", "aarch64_sme_umop4a", "_wide">;
480+ defm SVUMOP4S_HtoD : MOP4 <"s", "za64", "Us", "aarch64_sme_umop4s", "_wide">;
481+ defm SVSUMOP4A_D : SUMOP4<"a ", "za64", "s">;
482+ defm SVSUMOP4S_D : SUMOP4<"s ", "za64", "s">;
483+ defm SVUSMOP4A_D : USMOP4<"a ", "za64", "Us ">;
484+ defm SVUSMOP4S_D : USMOP4<"s ", "za64", "Us ">;
484485
485486 def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>;
486487 def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>;
@@ -518,8 +519,8 @@ let SMETargetGuard = "sme2" in {
518519}
519520
520521let SMETargetGuard = "sme2,sme-f64f64" in {
521- defm SVFMOP4A_MZZ_D : MOP4SingleSingle <"a", "za64", "d", "aarch64_sme_mop4a", "">;
522- defm SVFMOP4S_MZZ_D : MOP4SingleSingle <"s", "za64", "d", "aarch64_sme_mop4s", "">;
522+ defm SVFMOP4A_D : MOP4 <"a", "za64", "d", "aarch64_sme_mop4a", "">;
523+ defm SVFMOP4S_D : MOP4 <"s", "za64", "d", "aarch64_sme_mop4s", "">;
523524
524525 def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
525526 def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
@@ -538,8 +539,8 @@ let SMETargetGuard = "sme2,sme-f64f64" in {
538539}
539540
540541let SMETargetGuard = "sme-f16f16" in {
541- defm SVFMOP4A_MZZ_H : MOP4SingleSingle <"a", "za16", "h", "aarch64_sme_mop4a", "">;
542- defm SVFMOP4S_MZZ_H : MOP4SingleSingle <"s", "za16", "h", "aarch64_sme_mop4s", "">;
542+ defm SVFMOP4A_H : MOP4 <"a", "za16", "h", "aarch64_sme_mop4a", "">;
543+ defm SVFMOP4S_H : MOP4 <"s", "za16", "h", "aarch64_sme_mop4s", "">;
543544
544545 def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
545546 def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
@@ -558,8 +559,8 @@ let SMETargetGuard = "sme-f16f16" in {
558559}
559560
560561let SMETargetGuard = "sme-b16b16" in {
561- defm SVBMOP4A_MZZ_H : MOP4SingleSingle <"a", "za16", "bf", "aarch64_sme_mop4a", "">;
562- defm SVBMOP4S_MZZ_H : MOP4SingleSingle <"s", "za16", "bf", "aarch64_sme_mop4s", "">;
562+ defm SVBMOP4A_H : MOP4 <"a", "za16", "bf", "aarch64_sme_mop4a", "">;
563+ defm SVBMOP4S_H : MOP4 <"s", "za16", "bf", "aarch64_sme_mop4s", "">;
563564
564565 def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
565566 def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
0 commit comments