Skip to content

Commit ed64949

Browse files
committed
[AArch64][llvm] Armv9.7-A: Add support for SVE2p3 DOT and MLA operations
Add instructions for SVE2p3 DOT and MLA operations: - BFMMLA (non-widening) - FMMLA (non-widening) - SDOT (2-way, vectors) - SDOT (2-way, indexed) - UDOT (2-way, vectors) - UDOT (2-way, indexed) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions
1 parent 8a5fb9f commit ed64949

File tree

18 files changed

+600
-27
lines changed

18 files changed

+600
-27
lines changed

clang/test/Driver/aarch64-v97a.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@
2626
// RUN: %clang -target aarch64 -march=armv9.7-a+sve2p3 -### -c %s 2>&1 | FileCheck -check-prefix=V97A-SVE2p3 %s
2727
// V97A-SVE2p3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+sve2p3"
2828

29+
// RUN: %clang -target aarch64 -march=armv9.7a+sve-b16mm -### -c %s 2>&1 | FileCheck -check-prefix=V97A-SVE-B16MM %s
30+
// RUN: %clang -target aarch64 -march=armv9.7-a+sve-b16mm -### -c %s 2>&1 | FileCheck -check-prefix=V97A-SVE-B16MM %s
31+
// V97A-SVE-B16MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+sve-b16mm"
32+
33+
// RUN: %clang -target aarch64 -march=armv9.7a+f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V97A-F16MM %s
34+
// RUN: %clang -target aarch64 -march=armv9.7-a+f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V97A-F16MM %s
35+
// V97A-F16MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+f16mm"
36+
2937
// RUN: %clang -target aarch64 -march=armv9.7a+cmh -### -c %s 2>&1 | FileCheck -check-prefix=V97A-CMH %s
3038
// RUN: %clang -target aarch64 -march=armv9.7-a+cmh -### -c %s 2>&1 | FileCheck -check-prefix=V97A-CMH %s
3139
// V97A-CMH: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+cmh"

clang/test/Driver/print-supported-extensions-aarch64.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
1919
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
2020
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
21+
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
2122
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
2223
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
2324
// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication
@@ -95,6 +96,7 @@
9596
// CHECK-NEXT: sve-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions
9697
// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable Armv9.6-A SVE multi-vector AES and multi-vector quadword polynomial multiply instructions
9798
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
99+
// CHECK-NEXT: sve-b16mm FEAT_SVE_B16MM Enable Armv9.7-A SVE non-widening BFloat16 matrix multiply-accumulate
98100
// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable Armv9.6-A SVE BFloat16 scaling instructions
99101
// CHECK-NEXT: sve-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
100102
// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable Armv9.6-A FP16 to FP32 Matrix Multiply

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def SVEUnsupported : AArch64Unsupported {
7474
}
7575

7676
def SME2p3Unsupported : AArch64Unsupported {
77-
let F = [HasSVE2p3_or_SME2p3];
77+
let F = [HasSVE2p3_or_SME2p3, HasSVE_B16MM];
7878
}
7979

8080
def SME2p2Unsupported : AArch64Unsupported {

llvm/lib/Target/AArch64/AArch64Features.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,12 @@ def FeatureSVE2p3 : ExtensionWithMArch<"sve2p3", "SVE2p3", "FEAT_SVE2p3",
613613
def FeatureSME2p3 : ExtensionWithMArch<"sme2p3", "SME2p3", "FEAT_SME2p3",
614614
"Enable Armv9.7-A Scalable Matrix Extension 2.3 instructions", [FeatureSME2p2]>;
615615

616+
def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B16MM",
617+
"Enable Armv9.7-A SVE non-widening BFloat16 matrix multiply-accumulate", [FeatureSVE]>;
618+
619+
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
620+
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
621+
616622
//===----------------------------------------------------------------------===//
617623
// Other Features
618624
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,12 @@ def HasCMH : Predicate<"Subtarget->hasCMH()">,
246246
AssemblerPredicateWithAll<(all_of FeatureCMH), "cmh">;
247247
def HasLSCP : Predicate<"Subtarget->hasLSCP()">,
248248
AssemblerPredicateWithAll<(all_of FeatureLSCP), "lscp">;
249+
def HasSVE2p2 : Predicate<"Subtarget->hasSVE2p2()">,
250+
AssemblerPredicateWithAll<(all_of FeatureSVE2p2), "sve2p2">;
251+
def HasSVE_B16MM : Predicate<"Subtarget->isSVEAvailable() &&ubtarget->hasSVE_B16MM()">,
252+
AssemblerPredicateWithAll<(all_of FeatureSVE_B16MM), "sve-b16mm">;
253+
def HasF16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasF16MM()">,
254+
AssemblerPredicateWithAll<(all_of FeatureF16MM), "f16mm">;
249255

250256
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
251257
// they should be enabled if either has been specified.

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,7 +2569,7 @@ let Predicates = [HasBF16, HasSVE_or_SME] in {
25692569
} // End HasBF16, HasSVE_or_SME
25702570

25712571
let Predicates = [HasBF16, HasSVE] in {
2572-
defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
2572+
defm BFMMLA_ZZZ_HtoS : sve_fp_matrix_mla<0b011, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
25732573
} // End HasBF16, HasSVE
25742574

25752575
let Predicates = [HasBF16, HasSVE_or_SME] in {
@@ -3680,15 +3680,15 @@ let Predicates = [HasSVE_or_SME, HasMatMulInt8] in {
36803680
} // End HasSVE_or_SME, HasMatMulInt8
36813681

36823682
let Predicates = [HasSVE, HasMatMulFP32] in {
3683-
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
3683+
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b101, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
36843684
} // End HasSVE, HasMatMulFP32
36853685

36863686
let Predicates = [HasSVE_F16F32MM] in {
3687-
def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>;
3687+
def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b001, "fmmla", ZPR32, ZPR16>;
36883688
} // End HasSVE_F16F32MM
36893689

36903690
let Predicates = [HasSVE, HasMatMulFP64] in {
3691-
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
3691+
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b111, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
36923692
defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>;
36933693
defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>;
36943694
defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>;
@@ -4631,8 +4631,30 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
46314631
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;
46324632
defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
46334633

4634+
// SVE2 integer dot product
4635+
def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;
4636+
def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>;
4637+
4638+
// SVE2 integer indexed dot product
4639+
def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">;
4640+
def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
4641+
46344642
} // End HasSME2p3orSVE2p3
46354643

4644+
//===----------------------------------------------------------------------===//
4645+
// SVE_B16MM Instructions
4646+
//===----------------------------------------------------------------------===//
4647+
let Predicates = [HasSVE_B16MM] in {
4648+
def BFMMLA_ZZZ_H : sve_fp_matrix_mla<0b110, "bfmmla", ZPR16, ZPR16>;
4649+
}
4650+
4651+
//===----------------------------------------------------------------------===//
4652+
// F16MM Instructions
4653+
//===----------------------------------------------------------------------===//
4654+
let Predicates = [HasSVE2p2, HasF16MM] in {
4655+
def FMMLA_ZZZ_H : sve_fp_matrix_mla<0b100, "fmmla", ZPR16, ZPR16>;
4656+
}
4657+
46364658
//===----------------------------------------------------------------------===//
46374659
// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2
46384660
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3892,6 +3892,8 @@ static const struct Extension {
38923892
{"gcie", {AArch64::FeatureGCIE}},
38933893
{"sme2p3", {AArch64::FeatureSME2p3}},
38943894
{"sve2p3", {AArch64::FeatureSVE2p3}},
3895+
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
3896+
{"f16mm", {AArch64::FeatureF16MM}},
38953897
};
38963898

38973899
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3787,16 +3787,15 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intri
37873787
// SVE Integer Dot Product Group - Indexed Group
37883788
//===----------------------------------------------------------------------===//
37893789

3790-
class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
3790+
class sve_intx_dot_by_indexed_elem<bit U, string asm,
37913791
ZPRRegOp zprty1, ZPRRegOp zprty2,
37923792
ZPRRegOp zprty3, Operand itype>
37933793
: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
37943794
asm, "\t$Zda, $Zn, $Zm$iop",
37953795
"", []>, Sched<[]> {
37963796
bits<5> Zda;
37973797
bits<5> Zn;
3798-
let Inst{31-23} = 0b010001001;
3799-
let Inst{22} = sz;
3798+
let Inst{31-24} = 0b01000100;
38003799
let Inst{21} = 0b1;
38013800
let Inst{15-11} = 0;
38023801
let Inst{10} = U;
@@ -3810,23 +3809,35 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
38103809

38113810
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
38123811
SDPatternOperator op> {
3813-
def _BtoS : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
3812+
def _BtoS : sve_intx_dot_by_indexed_elem<opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b_timm> {
38143813
bits<2> iop;
38153814
bits<3> Zm;
3815+
let Inst{23-22} = 0b10;
38163816
let Inst{20-19} = iop;
38173817
let Inst{18-16} = Zm;
38183818
}
3819-
def _HtoD : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
3819+
def _HtoD : sve_intx_dot_by_indexed_elem<opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b_timm> {
38203820
bits<1> iop;
38213821
bits<4> Zm;
3822-
let Inst{20} = iop;
3822+
let Inst{23-22} = 0b11;
3823+
let Inst{20} = iop;
38233824
let Inst{19-16} = Zm;
38243825
}
38253826

38263827
def : SVE_4_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _BtoS)>;
38273828
def : SVE_4_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _HtoD)>;
38283829
}
38293830

3831+
class sve_intx_dot_by_indexed_elem_x<bit opc, string asm>
3832+
: sve_intx_dot_by_indexed_elem<opc, asm, ZPR16, ZPR8, ZPR3b8, VectorIndexH32b_timm> {
3833+
bits<3> iop;
3834+
bits<3> Zm;
3835+
let Inst{23} = 0b0;
3836+
let Inst{22} = iop{2-2};
3837+
let Inst{20-19} = iop{1-0};
3838+
let Inst{18-16} = Zm;
3839+
}
3840+
38303841
//===----------------------------------------------------------------------===//
38313842
// SVE2 Complex Integer Dot Product Group
38323843
//===----------------------------------------------------------------------===//
@@ -9616,17 +9627,18 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
96169627
// SVE Floating Point Matrix Multiply Accumulate Group
96179628
//===----------------------------------------------------------------------===//
96189629

9619-
class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
9630+
class sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
96209631
: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm),
96219632
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
96229633
bits<5> Zda;
96239634
bits<5> Zn;
96249635
bits<5> Zm;
96259636
let Inst{31-24} = 0b01100100;
9626-
let Inst{23-22} = opc;
9637+
let Inst{23-22} = opc{2-1};
96279638
let Inst{21} = 1;
96289639
let Inst{20-16} = Zm;
9629-
let Inst{15-10} = 0b111001;
9640+
let Inst{15-11} = 0b11100;
9641+
let Inst{10} = opc{0};
96309642
let Inst{9-5} = Zn;
96319643
let Inst{4-0} = Zda;
96329644

@@ -9636,10 +9648,12 @@ class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_t
96369648
let mayRaiseFPException = 1;
96379649
}
96389650

9639-
multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> {
9651+
multiclass sve_fp_matrix_mla<bits<3> opc, string asm, ZPRRegOp zda_ty,
9652+
ZPRRegOp reg_ty, SDPatternOperator op,
9653+
ValueType zda_vt, ValueType reg_vt> {
96409654
def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>;
96419655

9642-
def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
9656+
def : SVE_3_Op_Pat<zda_vt, op, zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
96439657
}
96449658

96459659
//===----------------------------------------------------------------------===//

llvm/test/MC/AArch64/SVE/bfmmla-diagnostics.s

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,6 @@ bfmmla z0.s, z1.s, z2.h
55
// CHECK-NEXT: bfmmla z0.s, z1.s, z2.h
66
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
77

8-
bfmmla z0.h, z1.h, z2.h
9-
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
10-
// CHECK-NEXT: bfmmla z0.h, z1.h, z2.h
11-
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
12-
138
bfmmla z0.s, z1.h, z2.s
149
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
1510
// CHECK-NEXT: bfmmla z0.s, z1.h, z2.s

llvm/test/MC/AArch64/SVE2p1/sdot-diagnostics.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ sdot z0.s, z0.h, z0.h[-1]
2929
// Invalid vector suffix
3030

3131
sdot z0.h, z0.s, z0.s
32-
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
32+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
3333
// CHECK-NEXT: sdot z0.h, z0.s, z0.s
3434
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
3535

0 commit comments

Comments
 (0)