Skip to content

Commit 340cf21

Browse files
[AArch64]SME2 Multi-vector - Index/Single/Multi Array Vectors LONG INT MLA sources
This patch adds the assembly/disassembly for the following instructions: SMLALL: (multiple and indexed vector): Multi-vector signed integer multiply-add long long by indexed element. (multiple and single vector): Multi-vector signed integer multiply-add long long by vector. (multiple vectors): Multi-vector signed integer multiply-add long long. SMLSLL: (multiple and indexed vector): Multi-vector signed integer multiply-subtract long long by indexed element. (multiple and single vector): Multi-vector signed integer multiply-subtract long long by vector. (multiple vectors): Multi-vector signed integer multiply-subtract long long. SUMLALL: (multiple and indexed vector): Multi-vector signed by unsigned integer multiply-add long long by indexed element. (multiple and single vector): Multi-vector signed by unsigned integer multiply-add long long by vector. UMLALL: (multiple and indexed vector): Multi-vector unsigned integer multiply-add long long by indexed element. (multiple and single vector): Multi-vector unsigned integer multiply-add long long by vector. (multiple vectors): Multi-vector unsigned integer multiply-add long long. UMLSLL: (multiple and indexed vector): Multi-vector unsigned integer multiply-subtract long long by indexed element. (multiple and single vector): Multi-vector unsigned integer multiply-subtract long long by vector. (multiple vectors): Multi-vector unsigned integer multiply-subtract long long. USMLALL: (multiple and indexed vector): Multi-vector unsigned by signed integer multiply-add long long by indexed element. (multiple and single vector): Multi-vector unsigned by signed integer multiply-add long long by vector. (multiple vectors): Multi-vector unsigned by signed integer multiply-add long long. The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 It also adds a new immediate: uimm2s4range for off2 uimm1s4range for o1 to represent the vector select offset. The new operands have the range between the first and the last vector position. Depends on : D135785 Differential Revision: https://reviews.llvm.org/D136075
1 parent 69d117e commit 340cf21

17 files changed

+11109
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,15 +1381,28 @@ class UImmScaledMemoryIndexedRange<int Width, int Scale, int OffsetVal> : AsmOpe
13811381
let ParserMethod = "tryParseImmRange";
13821382
}
13831383

1384+
def UImm1s4RangeOperand : UImmScaledMemoryIndexedRange<1, 4, 3>;
13841385
def UImm2s2RangeOperand : UImmScaledMemoryIndexedRange<2, 2, 1>;
1386+
def UImm2s4RangeOperand : UImmScaledMemoryIndexedRange<2, 4, 3>;
13851387
def UImm3s2RangeOperand : UImmScaledMemoryIndexedRange<3, 2, 1>;
13861388

1389+
def uimm1s4range : Operand<i64>, ImmLeaf<i64,
1390+
[{ return Imm >= 0 && Imm <= 4 && ((Imm % 4) == 0); }], UImmS4XForm> {
1391+
let PrintMethod = "printImmRangeScale<4, 3>";
1392+
let ParserMatchClass = UImm1s4RangeOperand;
1393+
}
1394+
13871395
def uimm2s2range : Operand<i64>, ImmLeaf<i64,
13881396
[{ return Imm >= 0 && Imm <= 6 && ((Imm % 2) == 0); }], UImmS2XForm> {
13891397
let PrintMethod = "printImmRangeScale<2, 1>";
13901398
let ParserMatchClass = UImm2s2RangeOperand;
13911399
}
13921400

1401+
def uimm2s4range : Operand<i64>, ImmLeaf<i64,
1402+
[{ return Imm >= 0 && Imm <= 12 && ((Imm % 4) == 0); }], UImmS4XForm> {
1403+
let PrintMethod = "printImmRangeScale<4, 3>";
1404+
let ParserMatchClass = UImm2s4RangeOperand;
1405+
}
13931406

13941407
def uimm3s2range : Operand<i64>, ImmLeaf<i64,
13951408
[{ return Imm >= 0 && Imm <= 14 && ((Imm % 2) == 0); }], UImmS2XForm> {

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,57 @@ defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101
499499

500500
defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b0110, ZZ_h_mul_r, ZPR4b16>;
501501
defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8>;
502+
503+
def SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000>;
504+
defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000>;
505+
defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000>;
506+
def SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b0000, MatrixOp32, ZPR8, ZPR4b8>;
507+
defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8>;
508+
defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8>;
509+
defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b0000, MatrixOp32, ZZ_b_mul_r>;
510+
defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b0000, MatrixOp32, ZZZZ_b_mul_r>;
511+
512+
def USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b001>;
513+
defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100>;
514+
defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100>;
515+
def USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b0001, MatrixOp32, ZPR8, ZPR4b8>;
516+
defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8>;
517+
defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8>;
518+
defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b0001, MatrixOp32, ZZ_b_mul_r>;
519+
defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b0001, MatrixOp32, ZZZZ_b_mul_r>;
520+
521+
def SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010>;
522+
defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001>;
523+
defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001>;
524+
def SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b0010, MatrixOp32, ZPR8, ZPR4b8>;
525+
defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8>;
526+
defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8>;
527+
defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b0010, MatrixOp32, ZZ_b_mul_r>;
528+
defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b0010, MatrixOp32, ZZZZ_b_mul_r>;
529+
530+
def UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100>;
531+
defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010>;
532+
defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010>;
533+
def UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b0100, MatrixOp32, ZPR8, ZPR4b8>;
534+
defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8>;
535+
defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8>;
536+
defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b0100, MatrixOp32, ZZ_b_mul_r>;
537+
defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b0100, MatrixOp32, ZZZZ_b_mul_r>;
538+
539+
def SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b101>;
540+
defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110>;
541+
defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110>;
542+
defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8>;
543+
defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8>;
544+
545+
def UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110>;
546+
defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011>;
547+
defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011>;
548+
def UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b0110, MatrixOp32, ZPR8, ZPR4b8>;
549+
defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8>;
550+
defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8>;
551+
defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r>;
552+
defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r>;
502553
}
503554

504555

@@ -536,6 +587,42 @@ defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110110
536587
defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110110, MatrixOp64, ZZZZ_h_mul_r>;
537588

538589
defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16>;
590+
591+
def SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00>;
592+
defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00>;
593+
defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00>;
594+
def SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b1000, MatrixOp64, ZPR16, ZPR4b16>;
595+
defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16>;
596+
defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16>;
597+
defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b1000, MatrixOp64, ZZ_h_mul_r>;
598+
defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b1000, MatrixOp64, ZZZZ_h_mul_r>;
599+
600+
def SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01>;
601+
defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01>;
602+
defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01>;
603+
def SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b1010, MatrixOp64, ZPR16, ZPR4b16>;
604+
defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16>;
605+
defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16>;
606+
defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b1010, MatrixOp64, ZZ_h_mul_r>;
607+
defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b1010, MatrixOp64, ZZZZ_h_mul_r>;
608+
609+
def UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10>;
610+
defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10>;
611+
defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10>;
612+
def UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b1100, MatrixOp64, ZPR16, ZPR4b16>;
613+
defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16>;
614+
defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16>;
615+
defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b1100, MatrixOp64, ZZ_h_mul_r>;
616+
defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b1100, MatrixOp64, ZZZZ_h_mul_r>;
617+
618+
def UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11>;
619+
defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11>;
620+
defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11>;
621+
def UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b1110, MatrixOp64, ZPR16, ZPR4b16>;
622+
defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16>;
623+
defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16>;
624+
defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b1110, MatrixOp64, ZZ_h_mul_r>;
625+
defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b1110, MatrixOp64, ZZZZ_h_mul_r>;
539626
}
540627

541628
let Predicates = [HasSME2, HasSMEF64F64] in {

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5503,6 +5503,15 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
55035503
"where the first immediate is a multiple of 2 in the range [0, 6] or "
55045504
"[0, 14] "
55055505
"depending on the instruction, and the second immediate is immf + 1.");
5506+
case Match_InvalidMemoryIndexedRange4UImm1:
5507+
case Match_InvalidMemoryIndexedRange4UImm2:
5508+
return Error(
5509+
Loc,
5510+
"vector select offset must be an immediate range of the form "
5511+
"<immf>:<imml>, "
5512+
"where the first immediate is a multiple of 4 in the range [0, 4] or "
5513+
"[0, 12] "
5514+
"depending on the instruction, and the second immediate is immf + 3.");
55065515
case Match_InvalidSVEAddSubImm8:
55075516
return Error(Loc, "immediate must be an integer in range [0, 255]"
55085517
" with a shift amount of 0");
@@ -6159,6 +6168,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
61596168
case Match_InvalidImm1_64:
61606169
case Match_InvalidMemoryIndexedRange2UImm2:
61616170
case Match_InvalidMemoryIndexedRange2UImm3:
6171+
case Match_InvalidMemoryIndexedRange4UImm1:
6172+
case Match_InvalidMemoryIndexedRange4UImm2:
61626173
case Match_InvalidSVEAddSubImm8:
61636174
case Match_InvalidSVEAddSubImm16:
61646175
case Match_InvalidSVEAddSubImm32:

0 commit comments

Comments
 (0)