Skip to content

Commit 3c2d771

Browse files
authored
[AARCH64] Add assembly/disassembly for FMMLA instructions (llvm#113313)
This patch adds assembly/disassembly for the following instructions: FMMLA (widening, FP16 to FP32) FMMLA (widening, FP8 to FP16) FMMLA (widening, FP8 to FP32) According to [1] [1]https://developer.arm.com/documentation/ddi0602
1 parent d48c849 commit 3c2d771

15 files changed

+331
-46
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2427,7 +2427,7 @@ let Predicates = [HasBF16, HasSVEorSME] in {
24272427
} // End HasBF16, HasSVEorSME
24282428

24292429
let Predicates = [HasBF16, HasSVE] in {
2430-
defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
2430+
defm BFMMLA_ZZZ : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
24312431
} // End HasBF16, HasSVE
24322432

24332433
let Predicates = [HasBF16, HasSVEorSME] in {
@@ -3449,11 +3449,15 @@ let Predicates = [HasSVEorSME, HasMatMulInt8] in {
34493449
} // End HasSVEorSME, HasMatMulInt8
34503450

34513451
let Predicates = [HasSVE, HasMatMulFP32] in {
3452-
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
3452+
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
34533453
} // End HasSVE, HasMatMulFP32
34543454

3455+
let Predicates = [HasSVE_F16F32MM] in {
3456+
def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>;
3457+
} // End HasSVE_F16F32MM
3458+
34553459
let Predicates = [HasSVE, HasMatMulFP64] in {
3456-
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
3460+
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
34573461
defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>;
34583462
defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>;
34593463
defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>;
@@ -4342,6 +4346,14 @@ def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">;
43424346
def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">;
43434347
} // End HasSSVE_FP8FMA
43444348

4349+
let Predicates = [HasSVE2, HasF8F32MM] in {
4350+
def FMMLA_ZZZ_BtoS : sve2_fp8_mmla<0b0, ZPR32, "fmmla">;
4351+
}
4352+
4353+
let Predicates = [HasSVE2, HasF8F16MM] in {
4354+
def FMMLA_ZZZ_BtoH : sve2_fp8_mmla<0b1, ZPR16, "fmmla">;
4355+
}
4356+
43454357
let Predicates = [HasSSVE_FP8DOT2] in {
43464358
// FP8 Widening Dot-Product - Indexed Group
43474359
defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed_h<"fdot">;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9042,30 +9042,6 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
90429042
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
90439043
}
90449044

9045-
class sve_bfloat_matmul<string asm>
9046-
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
9047-
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
9048-
bits<5> Zm;
9049-
bits<5> Zda;
9050-
bits<5> Zn;
9051-
let Inst{31-21} = 0b01100100011;
9052-
let Inst{20-16} = Zm;
9053-
let Inst{15-10} = 0b111001;
9054-
let Inst{9-5} = Zn;
9055-
let Inst{4-0} = Zda;
9056-
9057-
let Constraints = "$Zda = $_Zda";
9058-
let DestructiveInstType = DestructiveOther;
9059-
let ElementSize = ElementSizeH;
9060-
let hasSideEffects = 0;
9061-
let mayRaiseFPException = 1;
9062-
}
9063-
9064-
multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
9065-
def NAME : sve_bfloat_matmul<asm>;
9066-
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
9067-
}
9068-
90699045
class sve_bfloat_convert<bit N, string asm>
90709046
: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
90719047
asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
@@ -9188,14 +9164,14 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
91889164
// SVE Floating Point Matrix Multiply Accumulate Group
91899165
//===----------------------------------------------------------------------===//
91909166

9191-
class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
9192-
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm),
9167+
class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
9168+
: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm),
91939169
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
91949170
bits<5> Zda;
91959171
bits<5> Zn;
91969172
bits<5> Zm;
9197-
let Inst{31-23} = 0b011001001;
9198-
let Inst{22} = sz;
9173+
let Inst{31-24} = 0b01100100;
9174+
let Inst{23-22} = opc;
91999175
let Inst{21} = 1;
92009176
let Inst{20-16} = Zm;
92019177
let Inst{15-10} = 0b111001;
@@ -9204,15 +9180,14 @@ class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
92049180

92059181
let Constraints = "$Zda = $_Zda";
92069182
let DestructiveInstType = DestructiveOther;
9207-
let ElementSize = zprty.ElementSize;
92089183
let hasSideEffects = 0;
92099184
let mayRaiseFPException = 1;
92109185
}
92119186

9212-
multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
9213-
def NAME : sve_fp_matrix_mla<sz, asm, zprty>;
9187+
multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> {
9188+
def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>;
92149189

9215-
def : SVE_3_Op_Pat<vt, op , vt, vt, vt, !cast<Instruction>(NAME)>;
9190+
def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
92169191
}
92179192

92189193
//===----------------------------------------------------------------------===//
@@ -10632,6 +10607,28 @@ class sve2_fp8_mla_long_long_by_indexed_elem<bits<2> TT, string mnemonic>
1063210607
let Uses = [FPMR, FPCR];
1063310608
}
1063410609

10610+
// FP8 Matrix Multiply-accumulate Group
10611+
class sve2_fp8_mmla<bit opc, ZPRRegOp dst_ty, string mnemonic>
10612+
: I<(outs dst_ty:$Zda),
10613+
(ins dst_ty:$_Zda, ZPR8:$Zn, ZPR8:$Zm),
10614+
mnemonic, "\t$Zda, $Zn, $Zm",
10615+
"", []>, Sched<[]>{
10616+
bits<5> Zda;
10617+
bits<5> Zn;
10618+
bits<5> Zm;
10619+
let Inst{31-23} = 0b011001000;
10620+
let Inst{22} = opc;
10621+
let Inst{21} = 0b1;
10622+
let Inst{20-16} = Zm;
10623+
let Inst{15-10} = 0b111000;
10624+
let Inst{9-5} = Zn;
10625+
let Inst{4-0} = Zda;
10626+
let Constraints = "$Zda = $_Zda";
10627+
let DestructiveInstType = DestructiveOther;
10628+
let ElementSize = dst_ty.ElementSize;
10629+
let Uses = [FPMR, FPCR];
10630+
}
10631+
1063510632
class sve_fp8_dot_indexed<bits<4> opc, ZPRRegOp dst_ty, Operand iop_ty, string mnemonic>
1063610633
: I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, iop_ty:$iop),
1063710634
mnemonic, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {

llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@
33
// --------------------------------------------------------------------------//
44
// FMMLA (SVE)
55

6-
// Invalid element size
7-
8-
fmmla z0.h, z1.h, z2.h
9-
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
10-
116
// Mis-matched element size
127

138
fmmla z0.d, z1.s, z2.s

llvm/test/MC/AArch64/SVE2/directive-arch-negative.s

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s
3030
// CHECK: error: instruction requires: sve2-bitperm
3131
// CHECK-NEXT: bgrp z21.s, z10.s, z21.s
3232

33+
.arch armv9-a+f8f16mm
34+
.arch armv9-a+nof8f16mm
35+
fmmla z23.h, z13.b, z8.b
36+
// CHECK: error: instruction requires: f8f16mm
37+
// CHECK-NEXT: fmmla z23.h, z13.b, z8.b
38+
39+
.arch armv9-a+f8f32mm
40+
.arch armv9-a+nof8f32mm
41+
fmmla z23.s, z13.b, z8.b
42+
// CHECK: error: instruction requires: f8f32mm
43+
// CHECK-NEXT: fmmla z23.s, z13.b, z8.b
44+
45+
.arch armv9-a+sve-f16f32mm
46+
.arch armv9-a+nosve-f16f32mm
47+
fmmla z23.s, z13.h, z8.h
48+
// CHECK: error: instruction requires: sve-f16f32mm
49+
// CHECK-NEXT: fmmla z23.s, z13.h, z8.h
50+
3351
.arch armv9-a+sve-bfscale
3452
.arch armv9-a+nosve-bfscale
3553
bfscale z0.h, p0/m, z0.h, z0.h
3654
// CHECK: error: instruction requires: sve-bfscale
37-
// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
55+
// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h

llvm/test/MC/AArch64/SVE2/directive-arch.s

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d
2020
bgrp z21.s, z10.s, z21.s
2121
// CHECK: bgrp z21.s, z10.s, z21.s
2222

23+
.arch armv9-a+f8f16mm
24+
fmmla z23.h, z13.b, z8.b
25+
// CHECK: fmmla z23.h, z13.b, z8.b
26+
27+
.arch armv9-a+f8f32mm
28+
fmmla z23.s, z13.b, z8.b
29+
// CHECK: fmmla z23.s, z13.b, z8.b
30+
31+
.arch armv9-a+sve-f16f32mm
32+
fmmla z23.s, z13.h, z8.h
33+
// CHECK: fmmla z23.s, z13.h, z8.h
34+
2335
.arch armv9-a+sve-bfscale
2436
bfscale z0.h, p0/m, z0.h, z0.h
25-
// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
37+
// CHECK: bfscale z0.h, p0/m, z0.h, z0.h

llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s
3030
// CHECK: error: instruction requires: sve2-bitperm
3131
// CHECK-NEXT: bgrp z21.s, z10.s, z21.s
3232

33+
.arch_extension f8f16mm
34+
.arch_extension nof8f16mm
35+
fmmla z23.h, z13.b, z8.b
36+
// CHECK: error: instruction requires: f8f16mm
37+
// CHECK-NEXT: fmmla z23.h, z13.b, z8.b
38+
39+
.arch_extension f8f32mm
40+
.arch_extension nof8f32mm
41+
fmmla z23.s, z13.b, z8.b
42+
// CHECK: error: instruction requires: f8f32mm
43+
// CHECK-NEXT: fmmla z23.s, z13.b, z8.b
44+
45+
.arch_extension sve-f16f32mm
46+
.arch_extension nosve-f16f32mm
47+
fmmla z23.s, z13.h, z8.h
48+
// CHECK: error: instruction requires: sve-f16f32mm
49+
// CHECK-NEXT: fmmla z23.s, z13.h, z8.h
50+
3351
.arch_extension sve-bfscale
3452
.arch_extension nosve-bfscale
3553
bfscale z0.h, p0/m, z0.h, z0.h
3654
// CHECK: error: instruction requires: sve-bfscale
37-
// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
55+
// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h

llvm/test/MC/AArch64/SVE2/directive-arch_extension.s

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d
2020
bgrp z21.s, z10.s, z21.s
2121
// CHECK: bgrp z21.s, z10.s, z21.s
2222

23+
.arch_extension f8f16mm
24+
fmmla z23.h, z13.b, z8.b
25+
// CHECK: fmmla z23.h, z13.b, z8.b
26+
27+
.arch_extension f8f32mm
28+
fmmla z23.s, z13.b, z8.b
29+
// CHECK: fmmla z23.s, z13.b, z8.b
30+
31+
.arch_extension sve-f16f32mm
32+
fmmla z23.s, z13.h, z8.h
33+
// CHECK: fmmla z23.s, z13.h, z8.h
34+
2335
.arch_extension sve-bfscale
2436
bfscale z0.h, p0/m, z0.h, z0.h
25-
// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
37+
// CHECK: bfscale z0.h, p0/m, z0.h, z0.h

llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s
3030
// CHECK: error: instruction requires: sve2-bitperm
3131
// CHECK-NEXT: bgrp z21.s, z10.s, z21.s
3232

33+
.cpu generic+sve2+f8f16mm
34+
.cpu generic+sve2+nof8f16mm
35+
fmmla z23.h, z13.b, z8.b
36+
// CHECK: error: instruction requires: f8f16mm
37+
// CHECK-NEXT: fmmla z23.h, z13.b, z8.b
38+
39+
.cpu generic+sve2+f8f32mm
40+
.cpu generic+sve2+nof8f32mm
41+
fmmla z23.s, z13.b, z8.b
42+
// CHECK: error: instruction requires: f8f32mm
43+
// CHECK-NEXT: fmmla z23.s, z13.b, z8.b
44+
45+
.cpu generic+sve-f16f32mm
46+
.cpu generic+nosve-f16f32mm
47+
fmmla z23.s, z13.h, z8.h
48+
// CHECK: error: instruction requires: sve-f16f32mm
49+
// CHECK-NEXT: fmmla z23.s, z13.h, z8.h
50+
3351
.cpu generic+sve-bfscale
3452
.cpu generic+nosve-bfscale
3553
bfscale z0.h, p0/m, z0.h, z0.h
3654
// CHECK: error: instruction requires: sve-bfscale
37-
// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
55+
// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h

llvm/test/MC/AArch64/SVE2/directive-cpu.s

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d
2020
bgrp z21.s, z10.s, z21.s
2121
// CHECK: bgrp z21.s, z10.s, z21.s
2222

23+
.cpu generic+sve2+f8f16mm
24+
fmmla z23.h, z13.b, z8.b
25+
// CHECK: fmmla z23.h, z13.b, z8.b
26+
27+
.cpu generic+sve2+f8f32mm
28+
fmmla z23.s, z13.b, z8.b
29+
// CHECK: fmmla z23.s, z13.b, z8.b
30+
31+
.cpu generic+sve-f16f32mm
32+
fmmla z23.s, z13.h, z8.h
33+
// CHECK: fmmla z23.s, z13.h, z8.h
34+
2335
.cpu generic+sve-bfscale
2436
bfscale z0.h, p0/m, z0.h, z0.h
25-
// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
37+
// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-f16f32mm 2>&1 < %s | FileCheck %s
2+
3+
// --------------------------------------------------------------------------//
4+
// FMMLA (SVE)
5+
6+
// Invalid element size
7+
8+
fmmla z0.s, z1.b, z2.b
9+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f32mm
10+
fmmla z0.d, z1.h, z2.h
11+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
12+
13+
// Mis-matched element size
14+
15+
fmmla z0.s, z1.h, z2.s
16+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
17+
fmmla z0.s, z1.d, z2.h
18+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width

0 commit comments

Comments
 (0)