|
| 1 | +// SPDX-FileCopyrightText: 2025 Rot127 < [email protected]> |
| 2 | +// SPDX-License-Identifier: LGPL-3.0-only |
| 3 | +// |
| 4 | +// Definitions of Apple's proprietary instructions. |
| 5 | + |
| 6 | + |
| 7 | +def FeatureAMX : SubtargetFeature<"amx", "HasAMX", "true", |
| 8 | + "Enable proprietary Apple AArch64 extension AMX.">; |
| 9 | + |
| 10 | +def HasAMX : Predicate<"Subtarget->hasAMX()">, |
| 11 | + AssemblerPredicate<(all_of FeatureAMX), "amx">; |
| 12 | + |
| 13 | +class ReservedI<dag oops, dag iops, string asm, string operands, |
| 14 | + list<dag> pattern = []> |
| 15 | + : I<oops, iops, asm, operands, "", pattern> { |
| 16 | + let Inst{31-29} = 0b000; |
| 17 | + let Inst{28-25} = 0b0000; |
| 18 | + let Inst{24-21} = 0b0001; |
| 19 | +} |
| 20 | + |
| 21 | +// |
| 22 | +// ====== AMX ====== |
| 23 | +// |
| 24 | +// Reference: https://github.com/corsix/amx |
| 25 | + |
| 26 | +// For now not all sub registers are defined here. |
| 27 | +// Because they cannot be addressed explicit or implicitly |
| 28 | +// by the AMX instructions. |
| 29 | +// The registers it uses are encoded in the GPR value |
| 30 | +// and hence only known during runtime. |
| 31 | +def X_LANE : AArch64Reg<0, "X">; |
| 32 | +def Y_LANE : AArch64Reg<0, "Y">; |
| 33 | +def Z_MATRIX : AArch64Reg<0, "Z">; |
| 34 | + |
| 35 | +class AppleAMX<bits<5> amx_opcode, dag oops, dag iops, string asm, string operands, |
| 36 | + list<dag> pattern = []> |
| 37 | + : ReservedI<oops, iops, asm, operands, pattern> { |
| 38 | + bits<5> src; |
| 39 | + let Inst{20-16} = 0b00000; |
| 40 | + let Inst{15-12} = 0b0001; |
| 41 | + let Inst{11-10} = 0b00; |
| 42 | + let Inst{9-5} = amx_opcode; |
| 43 | + let Inst{4-0} = src; |
| 44 | + |
| 45 | + let Predicates = [HasAMX]; |
| 46 | +} |
| 47 | + |
| 48 | +let mayLoad = 1 in { |
| 49 | + let Defs = [X_LANE] in |
| 50 | + def LDX : AppleAMX<0, (outs), (ins GPR64:$src), "ldx", "\t$src">; |
| 51 | + |
| 52 | + let Defs = [Y_LANE] in |
| 53 | + def LDY : AppleAMX<1, (outs), (ins GPR64:$src), "ldy", "\t$src">; |
| 54 | + |
| 55 | + let Defs = [Z_MATRIX] in { |
| 56 | + def LDZ : AppleAMX<4, (outs), (ins GPR64:$src), "ldz", "\t$src">; |
| 57 | + def LDZI : AppleAMX<6, (outs), (ins GPR64:$src), "ldzi", "\t$src">; |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +let mayStore = 1 in { |
| 62 | + let Uses = [X_LANE] in |
| 63 | + def STX : AppleAMX<2, (outs), (ins GPR64:$src), "stx", "\t$src">; |
| 64 | + |
| 65 | + let Uses = [Y_LANE] in |
| 66 | + def STY : AppleAMX<3, (outs), (ins GPR64:$src), "sty", "\t$src">; |
| 67 | + |
| 68 | + let Uses = [Z_MATRIX] in { |
| 69 | + def STZ : AppleAMX<5, (outs), (ins GPR64:$src), "stz", "\t$src">; |
| 70 | + def STZI : AppleAMX<7, (outs), (ins GPR64:$src), "stzi", "\t$src">; |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | +// The "set" and "clr" instruction techincally are defined by having an |
| 75 | +// immediate value operand. |
| 76 | +// I assume there is some kind of "amx state change" instruction. |
| 77 | +// The immediate value then determines what specific instruction it is. |
| 78 | +// Similar like the hint instructions which have alias depending on some |
| 79 | +// bits. |
| 80 | +// Since the underlying "state change instruction" is not documented |
| 81 | +// I make it an instruction with hard-coded bits[4:0] and no |
| 82 | +// immediate operand. |
| 83 | +let Defs = [X_LANE, Y_LANE, Z_MATRIX] in |
| 84 | +def SET : AppleAMX<17, (outs), (ins), "set", ""> { |
| 85 | + let Inst{4-0} = 0b00000; |
| 86 | +} |
| 87 | + |
| 88 | +def CLR : AppleAMX<17, (outs), (ins), "clr", ""> { |
| 89 | + let Inst{4-0} = 0b00001; |
| 90 | +} |
| 91 | + |
| 92 | +// These two instructions decode differently depending on the operand value. |
| 93 | +// They also use and define different registers. |
| 94 | +// All of it is onlye known during runtime, so implicit Defs and Uses |
| 95 | +// are omitted here. |
| 96 | +def EXTRX : AppleAMX<8, (outs), (ins GPR64:$src), "extrx", "\t$src">; |
| 97 | +def EXTRY : AppleAMX<9, (outs), (ins GPR64:$src), "extry", "\t$src">; |
| 98 | + |
| 99 | +let Uses = [X_LANE, Y_LANE, Z_MATRIX], Defs = [Z_MATRIX] in { |
| 100 | + def FMA64 : AppleAMX<10, (outs), (ins GPR64:$src), "fma64", "\t$src">; |
| 101 | + def FMS64 : AppleAMX<11, (outs), (ins GPR64:$src), "fms64", "\t$src">; |
| 102 | + def FMA32 : AppleAMX<12, (outs), (ins GPR64:$src), "fma32", "\t$src">; |
| 103 | + def FMS32 : AppleAMX<13, (outs), (ins GPR64:$src), "fms32", "\t$src">; |
| 104 | + def FMA16 : AppleAMX<15, (outs), (ins GPR64:$src), "fma16", "\t$src">; |
| 105 | + def FMS16 : AppleAMX<16, (outs), (ins GPR64:$src), "fms16", "\t$src">; |
| 106 | + def VECFP : AppleAMX<19, (outs), (ins GPR64:$src), "vecfp", "\t$src">; |
| 107 | + def MAC16 : AppleAMX<14, (outs), (ins GPR64:$src), "mac16", "\t$src">; |
| 108 | + def MATFP : AppleAMX<21, (outs), (ins GPR64:$src), "matfp", "\t$src">; |
| 109 | +} |
| 110 | + |
| 111 | +// These instructions might read Z as well (depends on bit 47 in the operand value). |
| 112 | +// But the indeirect read is only known during runtime when the operand value is known. |
| 113 | +// So here we don't add Z to the implicit read list. |
| 114 | +let Uses = [X_LANE, Y_LANE], Defs = [Z_MATRIX] in { |
| 115 | + def VECINT : AppleAMX<18, (outs), (ins GPR64:$src), "vecint", "\t$src">; |
| 116 | + def MATINT : AppleAMX<20, (outs), (ins GPR64:$src), "matint", "\t$src">; |
| 117 | +} |
| 118 | + |
| 119 | +def GENLUT : AppleAMX<22, (outs), (ins GPR64:$src), "genlut", "\t$src">; |
0 commit comments