@@ -553,6 +553,107 @@ def N3Write_16c_16V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
553553 let NumMicroOps = 16;
554554}
555555
556+ //===----------------------------------------------------------------------===//
557+ // Define forwarded types
558+ // NOTE: SOG, p. 19, n. 2: Accumulator forwarding is not supported for
559+ // consumers of 64 bit multiply high operations?
560+
561+ def N3Wr_FMA : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
562+ def N3Rd_FMA : SchedReadAdvance<2, [WriteFMul, N3Wr_FMA]>;
563+
564+ def N3Wr_VMA : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
565+ def N3Rd_VMA : SchedReadAdvance<3, [N3Wr_VMA]>;
566+
567+ def N3Wr_VMAL : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
568+ def N3Rd_VMAL : SchedReadAdvance<3, [N3Wr_VMAL]>;
569+
570+ def N3Wr_VMAH : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
571+ def N3Rd_VMAH : SchedReadAdvance<2, [N3Wr_VMAH]>;
572+
573+ def N3Wr_VMASL : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
574+ def N3Rd_VMASL : SchedReadAdvance<2, [N3Wr_VMASL]>;
575+
576+ def N3Wr_ADA : SchedWriteRes<[N3UnitV1]> { let Latency = 4; }
577+ def N3Rd_ADA : SchedReadAdvance<3, [N3Wr_ADA]>;
578+
579+ def N3Wr_VDOT : SchedWriteRes<[N3UnitV]> { let Latency = 3; }
580+ def N3Rd_VDOT : SchedReadAdvance<2, [N3Wr_VDOT]>;
581+
582+ def N3Wr_VMMA : SchedWriteRes<[N3UnitV]> { let Latency = 3; }
583+ def N3Rd_VMMA : SchedReadAdvance<2, [N3Wr_VMMA]>;
584+
585+ def N3Wr_FCMA : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
586+ def N3Rd_FCMA : SchedReadAdvance<2, [N3Wr_FCMA]>;
587+
588+ def N3Wr_FPM : SchedWriteRes<[N3UnitV]> { let Latency = 3; }
589+ def N3Wr_FPMA : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
590+ def N3Rd_FPMA : SchedReadAdvance<2, [N3Wr_FPM, N3Wr_FPMA]>;
591+
592+ def N3Wr_FPMAL : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
593+ def N3Rd_FPMAL : SchedReadAdvance<2, [N3Wr_FPMAL]>;
594+
595+ def N3Wr_BFD : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
596+ def N3Rd_BFD : SchedReadAdvance<2, [N3Wr_BFD]>;
597+
598+ def N3Wr_BFMMA : SchedWriteRes<[N3UnitV]> { let Latency = 5; }
599+ def N3Rd_BFMMA : SchedReadAdvance<2, [N3Wr_BFMMA]>;
600+
601+ def N3Wr_BFMLA : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
602+ def N3Rd_BFMLA : SchedReadAdvance<2, [N3Wr_BFMLA]>;
603+
604+ def N3Wr_CRC : SchedWriteRes<[N3UnitM0]> { let Latency = 2; }
605+ def N3Rd_CRC : SchedReadAdvance<1, [N3Wr_CRC]>;
606+
607+ def N3Wr_ZA : SchedWriteRes<[N3UnitV1]> { let Latency = 4; }
608+ def N3Rd_ZA : SchedReadAdvance<3, [N3Wr_ZA]>;
609+ def N3Wr_ZPA : SchedWriteRes<[N3UnitV1]> { let Latency = 4; }
610+ def N3Rd_ZPA : SchedReadAdvance<3, [N3Wr_ZPA]>;
611+ def N3Wr_ZSA : SchedWriteRes<[N3UnitV1]> { let Latency = 4; }
612+ def N3Rd_ZSA : SchedReadAdvance<3, [N3Wr_ZSA]>;
613+
614+ def N3Wr_ZDOTB : SchedWriteRes<[N3UnitV]> { let Latency = 3; }
615+ def N3Rd_ZDOTB : SchedReadAdvance<2, [N3Wr_ZDOTB]>;
616+ def N3Wr_ZDOTH : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
617+ def N3Rd_ZDOTH : SchedReadAdvance<3, [N3Wr_ZDOTH]>;
618+
619+ def N3Wr_ZCMABHS : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
620+ def N3Rd_ZCMABHS : SchedReadAdvance<3, [N3Wr_ZCMABHS]>;
621+ def N3Wr_ZCMAD : SchedWriteRes<[N3UnitV0, N3UnitV0]> { let Latency = 5; }
622+ def N3Rd_ZCMAD : SchedReadAdvance<2, [N3Wr_ZCMAD]>;
623+
624+ def N3Wr_ZMMA : SchedWriteRes<[N3UnitV]> { let Latency = 3; }
625+ def N3Rd_ZMMA : SchedReadAdvance<2, [N3Wr_ZMMA]>;
626+
627+ def N3Wr_ZMABHS : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
628+ def N3Rd_ZMABHS : SchedReadAdvance<3, [N3Wr_ZMABHS]>;
629+ def N3Wr_ZMAD : SchedWriteRes<[N3UnitV0, N3UnitV0]> { let Latency = 5; }
630+ def N3Rd_ZMAD : SchedReadAdvance<2, [N3Wr_ZMAD]>;
631+
632+ def N3Wr_ZMAL : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
633+ def N3Rd_ZMAL : SchedReadAdvance<3, [N3Wr_ZMAL]>;
634+
635+ def N3Wr_ZMASQL : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
636+ def N3Wr_ZMASQBHS : SchedWriteRes<[N3UnitV0]> { let Latency = 4; }
637+ def N3Wr_ZMASQD : SchedWriteRes<[N3UnitV0, N3UnitV0]> { let Latency = 5; }
638+ def N3Rd_ZMASQ : SchedReadAdvance<2, [N3Wr_ZMASQL, N3Wr_ZMASQBHS,
639+ N3Wr_ZMASQD]>;
640+
641+ def N3Wr_ZFCMA : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
642+ def N3Rd_ZFCMA : SchedReadAdvance<2, [N3Wr_ZFCMA]>;
643+
644+ def N3Wr_ZFMA : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
645+ def N3Rd_ZFMA : SchedReadAdvance<2, [N3Wr_ZFMA]>;
646+
647+ def N3Wr_ZFMAL : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
648+ def N3Rd_ZFMAL : SchedReadAdvance<2, [N3Wr_ZFMAL]>;
649+
650+ def N3Wr_ZBFDOT : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
651+ def N3Rd_ZBFDOT : SchedReadAdvance<2, [N3Wr_ZBFDOT]>;
652+ def N3Wr_ZBFMMA : SchedWriteRes<[N3UnitV]> { let Latency = 5; }
653+ def N3Rd_ZBFMMA : SchedReadAdvance<2, [N3Wr_ZBFMMA]>;
654+ def N3Wr_ZBFMAL : SchedWriteRes<[N3UnitV]> { let Latency = 4; }
655+ def N3Rd_ZBFMAL : SchedReadAdvance<2, [N3Wr_ZBFMAL]>;
656+
556657// Miscellaneous
557658// -----------------------------------------------------------------------------
558659
@@ -832,10 +933,11 @@ def : SchedAlias<WriteFDiv , N3Write_7c_1V0>;
832933def : InstRW<[N3Write_12c_1V0], (instrs FDIVDrr, FSQRTDr)>;
833934
834935// FP multiply
835- def : SchedAlias <WriteFMul, N3Write_3c_1V>;
936+ def : WriteRes <WriteFMul, [N3UnitV]> { let Latency = 3; }
836937
837938// FP multiply accumulate
838- def : InstRW<[N3Write_4c_1V], (instregex "^(FMADD|FMSUB|FNMADD|FNMSUB)[DHS]rrr$")>;
939+ def : InstRW<[N3Wr_FMA, ReadDefault, ReadDefault, N3Rd_FMA],
940+ (instregex "^(FMADD|FMSUB|FNMADD|FNMSUB)[DHS]rrr$")>;
839941
840942// FP round to integral
841943def : InstRW<[N3Write_3c_1V0], (instregex "^FRINT([AIMNPXZ]|32X|64X|32Z|64Z)[DHS]r$")>;
@@ -969,7 +1071,7 @@ def : SchedAlias<WriteVq, N3Write_2c_1V>;
9691071// ASIMD absolute diff accum long
9701072// ASIMD pairwise add and accumulate long
9711073// ASIMD shift accumulate
972- def : InstRW<[N3Write_4c_1V1 ], (instregex "^[SU]ABAL?v",
1074+ def : InstRW<[N3Wr_ADA, N3Rd_ADA ], (instregex "^[SU]ABAL?v",
9731075 "^[SU]ADALPv",
9741076 "^[SU]R?SRAv")>;
9751077
@@ -984,10 +1086,11 @@ def : InstRW<[N3Write_6c_2V1], (instregex "^[SU]?ADDL?Vv16i8v$")>;
9841086
9851087// ASIMD dot product
9861088// ASIMD dot product using signed and unsigned integers
987- def : InstRW<[N3Write_3c_1V], (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1089+ def : InstRW<[N3Wr_VDOT, N3Rd_VDOT],
1090+ (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
9881091
9891092// ASIMD matrix multiply-accumulate
990- def : InstRW<[N3Write_3c_1V ], (instrs SMMLA, UMMLA, USMMLA)>;
1093+ def : InstRW<[N3Wr_VMMA, N3Rd_VMMA ], (instrs SMMLA, UMMLA, USMMLA)>;
9911094
9921095// ASIMD max/min, reduce, 4H/4S
9931096def : InstRW<[N3Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4i(16|32)v$")>;
@@ -1002,16 +1105,16 @@ def : InstRW<[N3Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
10021105def : InstRW<[N3Write_4c_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
10031106
10041107// ASIMD multiply accumulate
1005- def : InstRW<[N3Write_4c_1V0 ], (instregex "^MLAv", "^MLSv")>;
1108+ def : InstRW<[N3Wr_VMA, N3Rd_VMA ], (instregex "^MLAv", "^MLSv")>;
10061109
10071110// ASIMD multiply accumulate high
1008- def : InstRW<[N3Write_4c_1V0 ], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1111+ def : InstRW<[N3Wr_VMAH, N3Rd_VMAH ], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
10091112
10101113// ASIMD multiply accumulate long
1011- def : InstRW<[N3Write_4c_1V0 ], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1114+ def : InstRW<[N3Wr_VMAL, N3Rd_VMAL ], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
10121115
10131116// ASIMD multiply accumulate saturating long
1014- def : InstRW<[N3Write_4c_1V0 ], (instregex "^SQDMLALv", "^SQDMLSLv")>;
1117+ def : InstRW<[N3Wr_VMASL, N3Rd_VMASL ], (instregex "^SQDMLALv", "^SQDMLSLv")>;
10151118
10161119// ASIMD multiply/multiply long (8x8) polynomial, D-form
10171120// ASIMD multiply/multiply long (8x8) polynomial, Q-form
@@ -1058,7 +1161,7 @@ def : InstRW<[N3Write_4c_1V1],
10581161def : InstRW<[N3Write_3c_1V], (instregex "^FCADDv")>;
10591162
10601163// ASIMD FP complex multiply add
1061- def : InstRW<[N3Write_4c_1V ], (instregex "^FCMLAv")>;
1164+ def : InstRW<[N3Wr_FCMA, N3Rd_FCMA ], (instregex "^FCMLAv")>;
10621165
10631166// ASIMD FP convert, long (F16 to F32)
10641167def : InstRW<[N3Write_4c_2V0], (instregex "^FCVTL(v4|v8)i16")>;
@@ -1114,13 +1217,13 @@ def : InstRW<[N3Write_4c_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
11141217def : InstRW<[N3Write_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
11151218
11161219// ASIMD FP multiply
1117- def : InstRW<[N3Write_3c_1V ], (instregex "^FMULv", "^FMULXv")>;
1220+ def : InstRW<[N3Wr_FPM ], (instregex "^FMULv", "^FMULXv")>;
11181221
11191222// ASIMD FP multiply accumulate
1120- def : InstRW<[N3Write_4c_1V ], (instregex "^FMLAv", "^FMLSv")>;
1223+ def : InstRW<[N3Wr_FPMA, N3Rd_FPMA ], (instregex "^FMLAv", "^FMLSv")>;
11211224
11221225// ASIMD FP multiply accumulate long
1123- def : InstRW<[N3Write_4c_1V ], (instregex "^FMLALv", "^FMLSLv")>;
1226+ def : InstRW<[N3Wr_FPMAL, N3Rd_FPMAL ], (instregex "^FMLALv", "^FMLSLv")>;
11241227
11251228// ASIMD FP round, D-form F32 and Q-form F64
11261229def : InstRW<[N3Write_3c_1V0],
@@ -1157,13 +1260,14 @@ def : InstRW<[N3Write_13c_2V0], (instrs FSQRTv2f64)>;
11571260def : InstRW<[N3Write_4c_2V0], (instrs BFCVTN, BFCVTN2)>;
11581261
11591262// ASIMD dot product
1160- def : InstRW<[N3Write_4c_1V ], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1263+ def : InstRW<[N3Wr_BFD, N3Rd_BFD ], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
11611264
11621265// ASIMD matrix multiply accumulate
1163- def : InstRW<[N3Write_5c_1V ], (instrs BFMMLA)>;
1266+ def : InstRW<[N3Wr_BFMMA, N3Rd_BFMMA ], (instrs BFMMLA)>;
11641267
11651268// ASIMD multiply accumulate long
1166- def : InstRW<[N3Write_4c_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT, BFMLALTIdx)>;
1269+ def : InstRW<[N3Wr_BFMLA, N3Rd_BFMLA],
1270+ (instrs BFMLALB, BFMLALBIdx, BFMLALT, BFMLALTIdx)>;
11671271
11681272// Scalar convert, F32 to BF16
11691273def : InstRW<[N3Write_3c_1V0], (instrs BFCVT)>;
@@ -1502,7 +1606,7 @@ def : InstRW<[N3Write_4c_1V0], (instrs SM4E, SM4ENCKEY)>;
15021606// -----------------------------------------------------------------------------
15031607
15041608// CRC checksum ops
1505- def : InstRW<[N3Write_2c_1M0 ], (instregex "^CRC32")>;
1609+ def : InstRW<[N3Wr_CRC, N3Rd_CRC ], (instregex "^CRC32")>;
15061610
15071611// SVE Predicate instructions
15081612// -----------------------------------------------------------------------------
@@ -1592,10 +1696,10 @@ def : InstRW<[N3Write_2c_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
15921696 "^[SU]ABD_ZPZZ_[BHSD]")>;
15931697
15941698// Arithmetic, absolute diff accum
1595- def : InstRW<[N3Write_4c_1V1 ], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1699+ def : InstRW<[N3Wr_ZA, N3Rd_ZA ], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
15961700
15971701// Arithmetic, absolute diff accum long
1598- def : InstRW<[N3Write_4c_1V1 ], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1702+ def : InstRW<[N3Wr_ZA, N3Rd_ZA ], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
15991703
16001704// Arithmetic, absolute diff long
16011705def : InstRW<[N3Write_2c_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
@@ -1629,7 +1733,8 @@ def : InstRW<[N3Write_2c_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
16291733def : InstRW<[N3Write_2c_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
16301734
16311735// Arithmetic, pairwise add and accum long
1632- def : InstRW<[N3Write_4c_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1736+ def : InstRW<[N3Wr_ZPA, ReadDefault, N3Rd_ZPA],
1737+ (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
16331738
16341739// Arithmetic, shift
16351740def : InstRW<[N3Write_2c_1V1],
@@ -1642,7 +1747,7 @@ def : InstRW<[N3Write_2c_1V1],
16421747 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
16431748
16441749// Arithmetic, shift and accumulate
1645- def : InstRW<[N3Write_4c_1V1 ],
1750+ def : InstRW<[N3Wr_ZSA, N3Rd_ZSA ],
16461751 (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
16471752
16481753// Arithmetic, shift by immediate
@@ -1688,16 +1793,17 @@ def : InstRW<[N3Write_2c_1V],
16881793def : InstRW<[N3Write_2c_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
16891794
16901795// Complex dot product 8-bit element
1691- def : InstRW<[N3Write_3c_1V ], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1796+ def : InstRW<[N3Wr_ZDOTB, N3Rd_ZDOTB ], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
16921797
16931798// Complex dot product 16-bit element
1694- def : InstRW<[N3Write_4c_1V0 ], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1799+ def : InstRW<[N3Wr_ZDOTH, N3Rd_ZDOTH ], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
16951800
16961801// Complex multiply-add B, H, S element size
1697- def : InstRW<[N3Write_4c_1V0], (instregex "^CMLA_ZZZ_[BHS]$", "^CMLA_ZZZI_[HS]$")>;
1802+ def : InstRW<[N3Wr_ZCMABHS, N3Rd_ZCMABHS],
1803+ (instregex "^CMLA_ZZZ_[BHS]$", "^CMLA_ZZZI_[HS]$")>;
16981804
16991805// Complex multiply-add D element size
1700- def : InstRW<[N3Write_5c_2V0 ], (instrs CMLA_ZZZ_D)>;
1806+ def : InstRW<[N3Wr_ZCMAD, N3Rd_ZCMAD ], (instrs CMLA_ZZZ_D)>;
17011807
17021808// Conditional extract operations, scalar form
17031809def : InstRW<[N3Write_8c_1M0_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
@@ -1736,13 +1842,14 @@ def : InstRW<[N3Write_16c_16V0], (instregex "^[SU]DIVR?_ZPmZ_D",
17361842 "^[SU]DIV_ZPZZ_D")>;
17371843
17381844// Dot product, 8 bit
1739- def : InstRW<[N3Write_3c_1V ], (instregex "^[SU]DOT_ZZZI?_BtoS$")>;
1845+ def : InstRW<[N3Wr_ZDOTB, N3Rd_ZDOTB ], (instregex "^[SU]DOT_ZZZI?_BtoS$")>;
17401846
17411847// Dot product, 8 bit, using signed and unsigned integers
1742- def : InstRW<[N3Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1848+ def : InstRW<[N3Wr_ZDOTB, N3Rd_ZDOTB],
1849+ (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
17431850
17441851// Dot product, 16 bit
1745- def : InstRW<[N3Write_4c_1V0 ], (instregex "^[SU]DOT_ZZZI?_HtoD$")>;
1852+ def : InstRW<[N3Wr_ZDOTH, N3Rd_ZDOTH ], (instregex "^[SU]DOT_ZZZI?_HtoD$")>;
17461853
17471854// Duplicate, immediate and indexed form
17481855def : InstRW<[N3Write_2c_1V], (instregex "^DUP_ZI_[BHSD]$",
@@ -1804,7 +1911,7 @@ def : InstRW<[N3Write_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
18041911def : InstRW<[N3Write_2c_1V], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
18051912
18061913// Matrix multiply-accumulate
1807- def : InstRW<[N3Write_3c_1V ], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1914+ def : InstRW<[N3Wr_ZMMA, N3Rd_ZMMA ], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
18081915
18091916// Move prefix
18101917def : InstRW<[N3Write_2c_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
@@ -1827,20 +1934,22 @@ def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
18271934 "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
18281935
18291936// Multiply accumulate, B, H, S element size
1830- def : InstRW<[N3Write_4c_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1831- "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
1937+ def : InstRW<[N3Wr_ZMABHS, ReadDefault, N3Rd_ZMABHS],
1938+ (instregex "^ML[AS]_ZZZI_[BHS]$",
1939+ "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
18321940
18331941// Multiply accumulate, D element size
1834- def : InstRW<[N3Write_5c_2V0 ], (instregex "^ML[AS]_ZZZI_D$",
1942+ def : InstRW<[N3Wr_ZMAD, ReadDefault, N3Rd_ZMAD ], (instregex "^ML[AS]_ZZZI_D$",
18351943 "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
18361944
18371945// Multiply accumulate long
1838- def : InstRW<[N3Write_4c_1V0 ], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1946+ def : InstRW<[N3Wr_ZMAL, N3Rd_ZMAL ], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
18391947 "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
18401948
18411949// Multiply accumulate saturating doubling long regular
1842- def : InstRW<[N3Write_4c_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1843- "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1950+ def : InstRW<[N3Wr_ZMASQL, N3Rd_ZMASQ],
1951+ (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1952+ "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
18441953
18451954// Multiply saturating doubling high, B, H, S element size
18461955def : InstRW<[N3Write_4c_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
@@ -1854,13 +1963,13 @@ def : InstRW<[N3Write_4c_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
18541963 "^SQDMULL[BT]_ZZZI_[SD]$")>;
18551964
18561965// Multiply saturating rounding doubling regular/complex accumulate, B, H, S element size
1857- def : InstRW<[N3Write_4c_1V0 ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1966+ def : InstRW<[N3Wr_ZMASQBHS, N3Rd_ZMASQ ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
18581967 "^SQRDCMLAH_ZZZ_[BHS]$",
18591968 "^SQRDML[AS]H_ZZZI_[HS]$",
18601969 "^SQRDCMLAH_ZZZI_[HS]$")>;
18611970
18621971// Multiply saturating rounding doubling regular/complex accumulate, D element size
1863- def : InstRW<[N3Write_5c_2V0 ], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1972+ def : InstRW<[N3Wr_ZMASQD, N3Rd_ZMASQ ], (instregex "^SQRDML[AS]H_ZZZI?_D$",
18641973 "^SQRDCMLAH_ZZZ_D$")>;
18651974
18661975// Multiply saturating rounding doubling regular/complex, B, H, S element size
@@ -1948,8 +2057,9 @@ def : InstRW<[N3Write_2c_1V], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
19482057def : InstRW<[N3Write_3c_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
19492058
19502059// Floating point complex multiply add
1951- def : InstRW<[N3Write_4c_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1952- "^FCMLA_ZZZI_[HS]$")>;
2060+ def : InstRW<[N3Wr_ZFCMA, ReadDefault, N3Rd_ZFCMA],
2061+ (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2062+ def : InstRW<[N3Wr_ZFCMA, N3Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
19532063
19542064// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
19552065def : InstRW<[N3Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
@@ -2014,12 +2124,15 @@ def : InstRW<[N3Write_3c_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
20142124 "^FMUL_ZPZ[IZ]_[HSD]")>;
20152125
20162126// Floating point multiply accumulate
2017- def : InstRW<[N3Write_4c_1V], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
2018- "^FN?ML[AS]_ZPZZZ_[HSD]",
2019- "^FML[AS]_ZZZI_[HSD]$")>;
2127+ def : InstRW<[N3Wr_ZFMA, ReadDefault, N3Rd_ZFMA],
2128+ (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2129+ "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2130+ def : InstRW<[N3Wr_ZFMA, N3Rd_ZFMA],
2131+ (instregex "^FML[AS]_ZZZI_[HSD]",
2132+ "^FN?ML[AS]_ZPZZZ_[HSD]")>;
20202133
20212134// Floating point multiply add/sub accumulate long
2022- def : InstRW<[N3Write_4c_1V ], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
2135+ def : InstRW<[N3Wr_ZFMAL, N3Rd_ZFMAL ], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
20232136
20242137// Floating point reciprocal estimate, F16
20252138def : InstRW<[N3Write_6c_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
@@ -2079,13 +2192,13 @@ def : InstRW<[N3Write_3c_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
20792192def : InstRW<[N3Write_4c_2V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
20802193
20812194// Dot product
2082- def : InstRW<[N3Write_4c_1V ], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2195+ def : InstRW<[N3Wr_ZBFDOT, N3Rd_ZBFDOT ], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
20832196
20842197// Matrix multiply accumulate
2085- def : InstRW<[N3Write_5c_1V ], (instrs BFMMLA_ZZZ_HtoS)>;
2198+ def : InstRW<[N3Wr_ZBFMMA, N3Rd_ZBFMMA ], (instrs BFMMLA_ZZZ_HtoS)>;
20862199
20872200// Multiply accumulate long
2088- def : InstRW<[N3Write_4c_1V ], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
2201+ def : InstRW<[N3Wr_ZBFMAL, N3Rd_ZBFMAL ], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
20892202
20902203// SVE Load instructions
20912204// -----------------------------------------------------------------------------
0 commit comments