Skip to content

Commit 850e086

Browse files
committed
Add Apple proprietary AMX extension.
1 parent 01f40dc commit 850e086

23 files changed

+166
-23
lines changed

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,23 +1479,30 @@ def ProcessorFeatures {
14791479
list<SubtargetFeature> AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
14801480
FeatureNEON, FeaturePerfMon, FeatureFullFP16,
14811481
FeatureFP16FML, FeatureSHA3];
1482+
1483+
// Capstone addition AMX: The A14 onwards are variants (same generation)
1484+
// of the Apple M series processors.
1485+
// To my knowledge it is not documented if those processors actually support AMX,
1486+
// but we add it here as feature anyways. Simply because it is too much work
1487+
// adding a model for the Apple M series processors. Capstone currently
1488+
// doesn't care about the LLVM processor definitions.
14821489
list<SubtargetFeature> AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
14831490
FeatureNEON, FeaturePerfMon, FeatureFRInt3264,
14841491
FeatureSpecRestrict, FeatureSSBS, FeatureSB,
14851492
FeaturePredRes, FeatureCacheDeepPersist,
14861493
FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
1487-
FeatureAltFPCmp];
1494+
FeatureAltFPCmp, FeatureAMX];
14881495
list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
14891496
FeatureNEON, FeaturePerfMon, FeatureSHA3,
1490-
FeatureFullFP16, FeatureFP16FML];
1497+
FeatureFullFP16, FeatureFP16FML, FeatureAMX];
14911498
list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
14921499
FeatureNEON, FeaturePerfMon, FeatureSHA3,
14931500
FeatureFullFP16, FeatureFP16FML,
1494-
FeatureHCX];
1501+
FeatureHCX, FeatureAMX];
14951502
list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
14961503
FeatureNEON, FeaturePerfMon, FeatureSHA3,
14971504
FeatureFullFP16, FeatureFP16FML,
1498-
FeatureHCX];
1505+
FeatureHCX, FeatureAMX];
14991506
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
15001507
FeaturePerfMon];
15011508
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// SPDX-FileCopyrightText: 2025 Rot127 <[email protected]>
2+
// SPDX-License-Identifier: LGPL-3.0-only
3+
//
4+
// Definitions of Apple's proprietary instructions.
5+
6+
7+
def FeatureAMX : SubtargetFeature<"amx", "HasAMX", "true",
8+
"Enable proprietary Apple AArch64 extension AMX.">;
9+
10+
def HasAMX : Predicate<"Subtarget->hasAMX()">,
11+
AssemblerPredicate<(all_of FeatureAMX), "amx">;
12+
13+
class ReservedI<dag oops, dag iops, string asm, string operands,
14+
list<dag> pattern = []>
15+
: I<oops, iops, asm, operands, "", pattern> {
16+
let Inst{31-29} = 0b000;
17+
let Inst{28-25} = 0b0000;
18+
let Inst{24-21} = 0b0001;
19+
}
20+
21+
//
22+
// ====== AMX ======
23+
//
24+
// Reference: https://github.com/corsix/amx
25+
26+
// For now not all sub registers are defined here.
27+
// Because they cannot be addressed explicit or implicitly
28+
// by the AMX instructions.
29+
// The registers it uses are encoded in the GPR value
30+
// and hence only known during runtime.
31+
def X_LANE : AArch64Reg<0, "X">;
32+
def Y_LANE : AArch64Reg<0, "Y">;
33+
def Z_MATRIX : AArch64Reg<0, "Z">;
34+
35+
class AppleAMX<bits<5> amx_opcode, dag oops, dag iops, string asm, string operands,
36+
list<dag> pattern = []>
37+
: ReservedI<oops, iops, asm, operands, pattern> {
38+
bits<5> src;
39+
let Inst{20-16} = 0b00000;
40+
let Inst{15-12} = 0b0001;
41+
let Inst{11-10} = 0b00;
42+
let Inst{9-5} = amx_opcode;
43+
let Inst{4-0} = src;
44+
45+
let Predicates = [HasAMX];
46+
}
47+
48+
let mayLoad = 1 in {
49+
let Defs = [X_LANE] in
50+
def LDX : AppleAMX<0, (outs), (ins GPR64:$src), "ldx", "\t$src">;
51+
52+
let Defs = [Y_LANE] in
53+
def LDY : AppleAMX<1, (outs), (ins GPR64:$src), "ldy", "\t$src">;
54+
55+
let Defs = [Z_MATRIX] in {
56+
def LDZ : AppleAMX<4, (outs), (ins GPR64:$src), "ldz", "\t$src">;
57+
def LDZI : AppleAMX<6, (outs), (ins GPR64:$src), "ldzi", "\t$src">;
58+
}
59+
}
60+
61+
let mayStore = 1 in {
62+
let Uses = [X_LANE] in
63+
def STX : AppleAMX<2, (outs), (ins GPR64:$src), "stx", "\t$src">;
64+
65+
let Uses = [Y_LANE] in
66+
def STY : AppleAMX<3, (outs), (ins GPR64:$src), "sty", "\t$src">;
67+
68+
let Uses = [Z_MATRIX] in {
69+
def STZ : AppleAMX<5, (outs), (ins GPR64:$src), "stz", "\t$src">;
70+
def STZI : AppleAMX<7, (outs), (ins GPR64:$src), "stzi", "\t$src">;
71+
}
72+
}
73+
74+
// The "set" and "clr" instruction techincally are defined by having an
75+
// immediate value operand.
76+
// I assume there is some kind of "amx state change" instruction.
77+
// The immediate value then determines what specific instruction it is.
78+
// Similar like the hint instructions which have alias depending on some
79+
// bits.
80+
// Since the underlying "state change instruction" is not documented
81+
// I make it an instruction with hard-coded bits[4:0] and no
82+
// immediate operand.
83+
let Defs = [X_LANE, Y_LANE, Z_MATRIX] in
84+
def SET : AppleAMX<17, (outs), (ins), "set", ""> {
85+
let Inst{4-0} = 0b00000;
86+
}
87+
88+
def CLR : AppleAMX<17, (outs), (ins), "clr", ""> {
89+
let Inst{4-0} = 0b00001;
90+
}
91+
92+
// These two instructions decode differently depending on the operand value.
93+
// They also use and define different registers.
94+
// All of it is onlye known during runtime, so implicit Defs and Uses
95+
// are omitted here.
96+
def EXTRX : AppleAMX<8, (outs), (ins GPR64:$src), "extrx", "\t$src">;
97+
def EXTRY : AppleAMX<9, (outs), (ins GPR64:$src), "extry", "\t$src">;
98+
99+
let Uses = [X_LANE, Y_LANE, Z_MATRIX], Defs = [Z_MATRIX] in {
100+
def FMA64 : AppleAMX<10, (outs), (ins GPR64:$src), "fma64", "\t$src">;
101+
def FMS64 : AppleAMX<11, (outs), (ins GPR64:$src), "fms64", "\t$src">;
102+
def FMA32 : AppleAMX<12, (outs), (ins GPR64:$src), "fma32", "\t$src">;
103+
def FMS32 : AppleAMX<13, (outs), (ins GPR64:$src), "fms32", "\t$src">;
104+
def FMA16 : AppleAMX<15, (outs), (ins GPR64:$src), "fma16", "\t$src">;
105+
def FMS16 : AppleAMX<16, (outs), (ins GPR64:$src), "fms16", "\t$src">;
106+
def VECFP : AppleAMX<19, (outs), (ins GPR64:$src), "vecfp", "\t$src">;
107+
def MAC16 : AppleAMX<14, (outs), (ins GPR64:$src), "mac16", "\t$src">;
108+
def MATFP : AppleAMX<21, (outs), (ins GPR64:$src), "matfp", "\t$src">;
109+
}
110+
111+
// These instructions might read Z as well (depends on bit 47 in the operand value).
112+
// But the indeirect read is only known during runtime when the operand value is known.
113+
// So here we don't add Z to the implicit read list.
114+
let Uses = [X_LANE, Y_LANE], Defs = [Z_MATRIX] in {
115+
def VECINT : AppleAMX<18, (outs), (ins GPR64:$src), "vecint", "\t$src">;
116+
def MATINT : AppleAMX<20, (outs), (ins GPR64:$src), "matint", "\t$src">;
117+
}
118+
119+
def GENLUT : AppleAMX<22, (outs), (ins GPR64:$src), "genlut", "\t$src">;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9595,3 +9595,4 @@ include "AArch64InstrAtomics.td"
95959595
include "AArch64SVEInstrInfo.td"
95969596
include "AArch64SMEInstrInfo.td"
95979597
include "AArch64InstrGISel.td"
9598+
include "AArch64AppleProprietary.td"

llvm/lib/Target/AArch64/AArch64SchedA53.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel {
2929
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
3030
PAUnsupported.F,
3131
SMEUnsupported.F,
32-
[HasMTE, HasCSSC]);
32+
[HasMTE, HasCSSC, HasAMX]);
3333
}
3434

3535

llvm/lib/Target/AArch64/AArch64SchedA55.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def CortexA55Model : SchedMachineModel {
2929
let PostRAScheduler = 1; // Enable PostRA scheduler pass.
3030
let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
3131

32-
list<Predicate> UnsupportedFeatures = [HasSVE, HasMTE];
32+
list<Predicate> UnsupportedFeatures = [HasSVE, HasMTE, HasAMX];
3333

3434
// FIXME: Remove when all errors have been fixed.
3535
let FullInstRWOverlapCheck = 0;

llvm/lib/Target/AArch64/AArch64SchedA57.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel {
3434
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
3535
PAUnsupported.F,
3636
SMEUnsupported.F,
37-
[HasMTE, HasCSSC]);
37+
[HasMTE, HasCSSC, HasAMX]);
3838
}
3939

4040
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def A64FXModel : SchedMachineModel {
2323
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
2424
[HasMTE, HasMatMulInt8, HasBF16,
2525
HasPAuth, HasPAuthLR, HasCPA,
26-
HasCSSC]);
26+
HasCSSC, HasAMX]);
2727
let FullInstRWOverlapCheck = 0;
2828
}
2929

llvm/lib/Target/AArch64/AArch64SchedAmpere1.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def Ampere1Model : SchedMachineModel {
2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
SMEUnsupported.F,
2929
PAUnsupported.F,
30-
[HasMTE]);
30+
[HasMTE, HasAMX]);
3131
}
3232

3333
let SchedModel = Ampere1Model in {

llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def Ampere1BModel : SchedMachineModel {
2626

2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
SMEUnsupported.F,
29-
PAUnsupported.F);
29+
PAUnsupported.F,
30+
[HasAMX]);
3031
}
3132

3233
let SchedModel = Ampere1BModel in {

llvm/lib/Target/AArch64/AArch64SchedCyclone.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,4 +872,19 @@ def : ReadAdvance<ReadIM, 0>;
872872
def : ReadAdvance<ReadIMA, 0>;
873873
def : ReadAdvance<ReadID, 0>;
874874

875+
// Proprietary Apple instructions
876+
877+
//
878+
// AMX extension
879+
//
880+
881+
def : InstRW<[WriteVLD],
882+
(instregex "LD[XYZ]I?$")>;
883+
def : InstRW<[WriteVST],
884+
(instregex "ST[XYZ]I?$")>;
885+
def : InstRW<[WriteVST],
886+
(instrs SET, CLR)>;
887+
def : InstRW<[WriteVLD, WriteVST],
888+
(instrs FMA64, FMS64, FMA32, FMS32, FMA16, FMS16, VECFP, MAC16, MATFP, VECINT, MATINT, GENLUT, EXTRX, EXTRY)>;
889+
875890
} // SchedModel = CycloneModel

0 commit comments

Comments
 (0)