Skip to content

Commit 0d8078f

Browse files
committed
fixup! [AArch64][llvm] Add support for vmmlaq_[f16,f32]_mf8 intrinsics
Make it work properly
1 parent 533acb2 commit 0d8078f

File tree

3 files changed

+13
-37
lines changed

3 files changed

+13
-37
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9111,26 +9111,6 @@ class SIMDThreeSameVectorMatMul<bit B, bit U, string asm, SDPatternOperator OpNo
91119111
let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b|.4s\t$Rd, $Rn, $Rm}";
91129112
}
91139113

9114-
multiclass SIMDThreeSameVectorMatMulFP<bit B, bit U, string asm, SDPatternOperator OpNode> {
9115-
let Predicates = [HasNEON, HasF8F16MM] in {
9116-
def fp16 : BaseSIMDThreeSameVectorTied<1, U, 0b000, {0b1101, B}, V128, asm, ".8h",
9117-
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
9118-
(v16i8 V128:$Rn),
9119-
(v16i8 V128:$Rm)))]> {
9120-
let AsmString = asm # "{\t$Rd.4h, $Rn.16b, $Rm.16b|.8h\t$Rd, $Rn, $Rm}";
9121-
}
9122-
}
9123-
9124-
let Predicates = [HasNEON, HasF8F32MM] in {
9125-
def fp32 : BaseSIMDThreeSameVectorTied<1, U, 0b100, {0b1101, B}, V128, asm, ".4s",
9126-
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
9127-
(v16i8 V128:$Rn),
9128-
(v16i8 V128:$Rm)))]> {
9129-
let AsmString = asm # "{\t$Rd.4s, $Rn.16b, $Rm.16b|.4s\t$Rd, $Rn, $Rm}";
9130-
}
9131-
}
9132-
}
9133-
91349114
//----------------------------------------------------------------------------
91359115
// ARMv8.2-A Dot Product Instructions (Indexed)
91369116
class BaseSIMDThreeSameVectorIndexS<bit Q, bit U, bits<2> size, bits<4> opc, string asm,
@@ -13312,18 +13292,24 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
1331213292
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
1331313293
}
1331413294

13315-
class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
13295+
class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind, list<dag> pattern>
1331613296
: BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
13317-
V128, asm, ".16b", []> {
13297+
V128, asm, ".16b", pattern> {
1331813298
let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn.16b, $Rm.16b",
1331913299
"|", kind, "\t$Rd, $Rn, $Rm}");
1332013300
}
1332113301

13322-
multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
13323-
def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h">{
13302+
multiclass SIMDThreeSameVectorFP8MatrixMul<string asm, SDPatternOperator OpNode>{
13303+
def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h",
13304+
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
13305+
(v16i8 V128:$Rn),
13306+
(v16i8 V128:$Rm)))]> {
1332413307
let Predicates = [HasNEON, HasF8F16MM];
1332513308
}
13326-
def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s">{
13309+
def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s",
13310+
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
13311+
(v16i8 V128:$Rn),
13312+
(v16i8 V128:$Rm)))]> {
1332713313
let Predicates = [HasNEON, HasF8F32MM];
1332813314
}
1332913315
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,20 +1731,10 @@ def BFCVT : BF16ToSinglePrecision<"bfcvt">;
17311731
let Predicates = [HasMatMulInt8] in {
17321732
def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
17331733
def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1734-
defm FMMLA : SIMDThreeSameVectorMatMulFP<1, 1, "fmmla", int_aarch64_neon_fmmla>;
17351734
def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
17361735
defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", AArch64usdot>;
17371736
defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", AArch64usdot>;
17381737

1739-
// FMMLA fp16
1740-
def : Pat<(v8f16 (int_aarch64_neon_fmmla
1741-
(v8f16 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
1742-
(FMMLAfp16 V128:$Rd, V128:$Rn, V128:$Rm)>;
1743-
1744-
// FMMLA fp32
1745-
def : Pat<(v4f32 (int_aarch64_neon_fmmla
1746-
(v4f32 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
1747-
(FMMLAfp32 V128:$Rd, V128:$Rn, V128:$Rm)>;
17481738

17491739
// sudot lane has a pattern where usdot is expected (there is no sudot).
17501740
// The second operand is used in the dup operation to repeat the indexed
@@ -11426,7 +11416,7 @@ let Predicates = [HasF16F32MM] in
1142611416
defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
1142711417

1142811418
let Uses = [FPMR, FPCR] in
11429-
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
11419+
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla", int_aarch64_neon_fmmla>;
1143011420

1143111421
//===----------------------------------------------------------------------===//
1143211422
// Contention Management Hints (FEAT_CMH)

llvm/test/CodeGen/AArch64/aarch64-matmul-fp16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
define <8 x half> @fmmla.v8f16.v16i8(<8 x half> %r, <16 x i8> %a, <16 x i8> %b) {
66
; CHECK-LABEL: fmmla.v8f16.v16i8:
77
; CHECK: // %bb.0: // %entry
8-
; CHECK-NEXT: fmmla v0.4h, v1.16b, v2.16b
8+
; CHECK-NEXT: fmmla v0.8h, v1.16b, v2.16b
99
; CHECK-NEXT: ret
1010
entry:
1111
%vfmmla1.i = tail call <8 x half> @llvm.aarch64.neon.fmmla.v8f16.v16i8(<8 x half> %r, <16 x i8> %a, <16 x i8> %b) #3

0 commit comments

Comments
 (0)