@@ -707,16 +707,14 @@ let Predicates = [HasSVE_or_SME] in {
707707 defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", AArch64sdot>;
708708 defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", AArch64udot>;
709709
710- let Predicates = [HasSVE_or_SME] in {
711- def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
712- (UDOT_ZZZ_BtoS $Acc, $MulLHS, $MulRHS)>;
713- def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
714- (SDOT_ZZZ_BtoS $Acc, $MulLHS, $MulRHS)>;
715- def : Pat<(nxv2i64 (partial_reduce_umla nxv2i64:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
716- (UDOT_ZZZ_HtoD $Acc, $MulLHS, $MulRHS)>;
717- def : Pat<(nxv2i64 (partial_reduce_smla nxv2i64:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
718- (SDOT_ZZZ_HtoD $Acc, $MulLHS, $MulRHS)>;
719- } // End HasSVE_or_SME
710+ def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
711+ (UDOT_ZZZ_BtoS $Acc, $MulLHS, $MulRHS)>;
712+ def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)),
713+ (SDOT_ZZZ_BtoS $Acc, $MulLHS, $MulRHS)>;
714+ def : Pat<(nxv2i64 (partial_reduce_umla nxv2i64:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
715+ (UDOT_ZZZ_HtoD $Acc, $MulLHS, $MulRHS)>;
716+ def : Pat<(nxv2i64 (partial_reduce_smla nxv2i64:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
717+ (SDOT_ZZZ_HtoD $Acc, $MulLHS, $MulRHS)>;
720718
721719 defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
722720 defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
@@ -3646,6 +3644,9 @@ let Predicates = [HasSVE_or_SME, HasMatMulInt8] in {
36463644 defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", AArch64usdot>;
36473645 defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
36483646 defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
3647+
3648+ def : Pat<(nxv4i32 (partial_reduce_sumla nxv4i32:$Acc, nxv16i8:$LHS, nxv16i8:$RHS)),
3649+ (USDOT_ZZZ $Acc, $RHS, $LHS)>;
36493650} // End HasSVE_or_SME, HasMatMulInt8
36503651
36513652let Predicates = [HasSVE, HasMatMulFP32] in {
@@ -3752,6 +3753,19 @@ let Predicates = [HasSVE2_or_SME] in {
37523753 defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb", int_aarch64_sve_umlslb>;
37533754 defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt", int_aarch64_sve_umlslt>;
37543755
3756+ def : Pat<(nxv2i64 (partial_reduce_umla nxv2i64:$Acc, nxv4i32:$LHS, nxv4i32:$RHS)),
3757+ (UMLALT_ZZZ_D (UMLALB_ZZZ_D $Acc, $LHS, $RHS), $LHS, $RHS)>;
3758+ def : Pat<(nxv2i64 (partial_reduce_smla nxv2i64:$Acc, nxv4i32:$LHS, nxv4i32:$RHS)),
3759+ (SMLALT_ZZZ_D (SMLALB_ZZZ_D $Acc, $LHS, $RHS), $LHS, $RHS)>;
3760+ def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv8i16:$LHS, nxv8i16:$RHS)),
3761+ (UMLALT_ZZZ_S (UMLALB_ZZZ_S $Acc, $LHS, $RHS), $LHS, $RHS)>;
3762+ def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$LHS, nxv8i16:$RHS)),
3763+ (SMLALT_ZZZ_S (SMLALB_ZZZ_S $Acc, $LHS, $RHS), $LHS, $RHS)>;
3764+ def : Pat<(nxv8i16 (partial_reduce_umla nxv8i16:$Acc, nxv16i8:$LHS, nxv16i8:$RHS)),
3765+ (UMLALT_ZZZ_H (UMLALB_ZZZ_H $Acc, $LHS, $RHS), $LHS, $RHS)>;
3766+ def : Pat<(nxv8i16 (partial_reduce_smla nxv8i16:$Acc, nxv16i8:$LHS, nxv16i8:$RHS)),
3767+ (SMLALT_ZZZ_H (SMLALB_ZZZ_H $Acc, $LHS, $RHS), $LHS, $RHS)>;
3768+
37553769 // SVE2 saturating multiply-add long (indexed)
37563770 defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", int_aarch64_sve_sqdmlalb_lane>;
37573771 defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt", int_aarch64_sve_sqdmlalt_lane>;
@@ -3880,19 +3894,6 @@ let Predicates = [HasSVE2_or_SME] in {
38803894 def : Pat<(nxv8i16 (partial_reduce_smla nxv8i16:$Acc, nxv16i8:$Input, (nxv16i8 (splat_vector (i32 1))))),
38813895 (SADDWT_ZZZ_H (SADDWB_ZZZ_H $Acc, $Input), $Input)>;
38823896
3883- def : Pat<(nxv2i64 (partial_reduce_umla nxv2i64:$Acc, nxv4i32:$LHS, nxv4i32:$RHS)),
3884- (UMLALT_ZZZ_D (UMLALB_ZZZ_D $Acc, $LHS, $RHS), $LHS, $RHS)>;
3885- def : Pat<(nxv2i64 (partial_reduce_smla nxv2i64:$Acc, nxv4i32:$LHS, nxv4i32:$RHS)),
3886- (SMLALT_ZZZ_D (SMLALB_ZZZ_D $Acc, $LHS, $RHS), $LHS, $RHS)>;
3887- def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv8i16:$LHS, nxv8i16:$RHS)),
3888- (UMLALT_ZZZ_S (UMLALB_ZZZ_S $Acc, $LHS, $RHS), $LHS, $RHS)>;
3889- def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$LHS, nxv8i16:$RHS)),
3890- (SMLALT_ZZZ_S (SMLALB_ZZZ_S $Acc, $LHS, $RHS), $LHS, $RHS)>;
3891- def : Pat<(nxv8i16 (partial_reduce_umla nxv8i16:$Acc, nxv16i8:$LHS, nxv16i8:$RHS)),
3892- (UMLALT_ZZZ_H (UMLALB_ZZZ_H $Acc, $LHS, $RHS), $LHS, $RHS)>;
3893- def : Pat<(nxv8i16 (partial_reduce_smla nxv8i16:$Acc, nxv16i8:$LHS, nxv16i8:$RHS)),
3894- (SMLALT_ZZZ_H (SMLALB_ZZZ_H $Acc, $LHS, $RHS), $LHS, $RHS)>;
3895-
38963897 // SVE2 integer multiply long
38973898 defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb", int_aarch64_sve_sqdmullb>;
38983899 defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt", int_aarch64_sve_sqdmullt>;
@@ -4200,11 +4201,6 @@ let Predicates = [HasSVEAES2, HasNonStreamingSVE_or_SSVE_AES] in {
42004201 def PMULL_2ZZZ_Q : sve_crypto_pmull_multi<"pmull">;
42014202}
42024203
4203- let Predicates = [HasSVE_or_SME, HasMatMulInt8] in {
4204- def : Pat<(nxv4i32 (partial_reduce_sumla nxv4i32:$Acc, nxv16i8:$LHS, nxv16i8:$RHS)),
4205- (USDOT_ZZZ $Acc, $RHS, $LHS)>;
4206- } // End HasSVE_or_SME, HasMatMulInt8
4207-
42084204//===----------------------------------------------------------------------===//
42094205// SME or SVE2.1 instructions
42104206//===----------------------------------------------------------------------===//
@@ -4238,12 +4234,10 @@ defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2
42384234defm SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_lane_x2>;
42394235defm UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>;
42404236
4241- let Predicates = [HasSVE2p1_or_SME2] in {
4242- def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
4243- (UDOT_ZZZ_HtoS $Acc, $MulLHS, $MulRHS)>;
4244- def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
4245- (SDOT_ZZZ_HtoS $Acc, $MulLHS, $MulRHS)>;
4246- } // End HasSVE2p1_or_SME2
4237+ def : Pat<(nxv4i32 (partial_reduce_umla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
4238+ (UDOT_ZZZ_HtoS $Acc, $MulLHS, $MulRHS)>;
4239+ def : Pat<(nxv4i32 (partial_reduce_smla nxv4i32:$Acc, nxv8i16:$MulLHS, nxv8i16:$MulRHS)),
4240+ (SDOT_ZZZ_HtoS $Acc, $MulLHS, $MulRHS)>;
42474241
42484242defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>;
42494243defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>;
0 commit comments