Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/test/Driver/print-supported-extensions-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
// CHECK-NEXT: f16f32dot FEAT_F16F32DOT Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
// CHECK-NEXT: f16f32mm FEAT_F16F32MM Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;

def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
"Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;

def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
"Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;

//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
Expand Down
42 changes: 40 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}

// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
}]> {
let ParserMatchClass = Imm1_16Operand;
}

// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
}]> {
let ParserMatchClass = Imm1_8Operand;
}


// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
Expand Down Expand Up @@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}

multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {

def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}

Expand All @@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}

multiclass SIMDThreeSameVectorFMLA<string asm> {
def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
V128, v8f16, v8f16, null_frag>;
}

multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
V128, v4f32, v8f16, null_frag>;
}

multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
v2f32, v4f16, OpNode>;
def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
v4f32, v8f16, OpNode>;
}

// FP8 assembly/disassembly classes

//----------------------------------------------------------------------------
Expand Down Expand Up @@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}

multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
V64, v2f32, v4f16, VectorIndexS, null_frag>;
def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
V128, v4f32, v8f16, VectorIndexS, null_frag>;
}

//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;

// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
Expand Down Expand Up @@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}

let Predicates = [HasF16F32DOT] in {
defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
}

let Predicates = [HasF16MM] in
defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;

let Predicates = [HasF16F32MM] in
defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;

let Uses = [FPMR, FPCR] in
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;

//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3894,6 +3894,8 @@ static const struct Extension {
{"sve2p3", {AArch64::FeatureSVE2p3}},
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
{"f16mm", {AArch64::FeatureF16MM}},
{"f16f32dot", {AArch64::FeatureF16F32DOT}},
{"f16f32mm", {AArch64::FeatureF16F32MM}},
};

static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.8h, v1.8h, v2.8h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

Expand Down
59 changes: 59 additions & 0 deletions llvm/test/MC/AArch64/neon-fdot-diagnostics.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s

// --------------------------------------------------------------------------//
// Invalid operand

fdot v0.2s, v0.4b, v0.4b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2b, v0.4b, v0.4b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2s, v0.4s, v0.4s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2h, v0.4h, v0.4h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

// fdot indexed

fdot v0.2s, v0.4b, v0.4b[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2b, v0.4b, v0.4b[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2s, v0.4s, v0.4s[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2h, v0.4h, v0.4h[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

// --------------------------------------------------------------------------//
// Invalid immediate range

fdot v0.2s, v0.4h, v0.2h[-1]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fdot v0.2s, v0.4h, v0.2h[4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
147 changes: 147 additions & 0 deletions llvm/test/MC/AArch64/neon-fdot.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
// RUN: | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
// RUN: | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST

fdot v0.2s, v0.4h, v0.4h
// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0e80fc00 <unknown>

fdot v10.2s, v10.4h, v10.4h
// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0e8afd4a <unknown>

fdot v31.2s, v31.4h, v31.4h
// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0e9fffff <unknown>

fdot v0.4s, v0.8h, v0.8h
// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4e80fc00 <unknown>

fdot v10.4s, v10.8h, v10.8h
// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4e8afd4a <unknown>

fdot v31.4s, v31.8h, v31.8h
// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4e9fffff <unknown>

// fdot indexed

fdot v0.2s, v0.4h, v0.2h[0]
// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f409000 <unknown>

fdot v10.2s, v0.4h, v0.2h[0]
// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f40900a <unknown>

fdot v21.2s, v0.4h, v0.2h[0]
// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f409015 <unknown>

fdot v31.2s, v0.4h, v0.2h[0]
// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f40901f <unknown>

fdot v0.2s, v10.4h, v0.2h[0]
// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f409140 <unknown>

fdot v10.2s, v10.4h, v0.2h[0]
// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f40914a <unknown>

fdot v21.2s, v10.4h, v0.2h[0]
// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f409155 <unknown>

fdot v31.2s, v10.4h, v0.2h[0]
// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 0f40915f <unknown>

fdot v0.4s, v21.8h, v31.2h[3]
// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9aa0 <unknown>

fdot v10.4s, v21.8h, v31.2h[3]
// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9aaa <unknown>

fdot v21.4s, v21.8h, v31.2h[3]
// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9ab5 <unknown>

fdot v31.4s, v21.8h, v31.2h[3]
// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9abf <unknown>

fdot v0.4s, v31.8h, v31.2h[3]
// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9be0 <unknown>

fdot v10.4s, v31.8h, v31.2h[3]
// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9bea <unknown>

fdot v21.4s, v31.8h, v31.2h[3]
// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9bf5 <unknown>

fdot v31.4s, v31.8h, v31.2h[3]
// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
// CHECK-ERROR: instruction requires: f16f32dot
// CHECK-UNKNOWN: 4f7f9bff <unknown>
24 changes: 24 additions & 0 deletions llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s

// --------------------------------------------------------------------------//
// Invalid operand/vector

fmmla v0.4b, v0.8b, v0.8b
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4h, v0.8h, v0.8h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4s, v0.8s, v0.8s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

fmmla v0.4d, v0.8d, v0.8d
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
Loading
Loading