-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AArch64][llvm] Armv9.7-A: Add support for new Advanced SIMD (Neon) instructions #163165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang-driver Author: Jonathan Thackray (jthackray) ChangesAdd support for new Advanced SIMD (Neon) instructions:
as documented here:
Co-authored-by: Kerry McLaughlin <[email protected]> Patch is 27.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163165.diff 13 Files Affected:
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 50c3610123646..7975b5ab7cb83 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -18,6 +18,8 @@
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
+// CHECK-NEXT: f16f32dot FEAT_F16F32DOT Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
+// CHECK-NEXT: f16f32mm FEAT_F16F32MM Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 5f943d39321f9..d2838d5065d28 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d2f282eadd302..1bc9aea52a265 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}
+// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
+def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
+}]> {
+ let ParserMatchClass = Imm1_16Operand;
+}
+
+// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
+def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
+}]> {
+ let ParserMatchClass = Imm1_8Operand;
+}
+
+
// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
@@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 66df27050723a..b28dffc67eb1e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -256,6 +256,10 @@ def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f92badab9a1eb..49de93ee1d5b9 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3894,6 +3894,8 @@ static const struct Extension {
{"sve2p3", {AArch64::FeatureSVE2p3}},
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
{"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
index cf8d216581240..15efbee14ddc1 100644
--- a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
fmmla v0.8h, v1.8h, v2.8h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot-diagnostics.s b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
new file mode 100644
index 0000000000000..4f5f557644094
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fdot v0.2s, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// fdot indexed
+
+fdot v0.2s, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate range
+
+fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot.s b/llvm/test/MC/AArch64/neon-fdot.s
new file mode 100644
index 0000000000000..c8a8e2f9023e1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot.s
@@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fdot v0.2s, v0.4h, v0.4h
+// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e80fc00 <unknown>
+
+fdot v10.2s, v10.4h, v10.4h
+// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e8afd4a <unknown>
+
+fdot v31.2s, v31.4h, v31.4h
+// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e9fffff <unknown>
+
+fdot v0.4s, v0.8h, v0.8h
+// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e80fc00 <unknown>
+
+fdot v10.4s, v10.8h, v10.8h
+// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e8afd4a <unknown>
+
+fdot v31.4s, v31.8h, v31.8h
+// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e9fffff <unknown>
+
+// fdot indexed
+
+fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409000 <unknown>
+
+fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40900a <unknown>
+
+fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409015 <unknown>
+
+fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40901f <unknown>
+
+fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409140 <unknown>
+
+fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40914a <unknown>
+
+fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409155 <unknown>
+
+fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40915f <unknown>
+
+fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aa0 <unknown>
+
+fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aaa <unknown>
+
+fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9ab5 <unknown>
+
+fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9abf <unknown>
+
+fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9be0 <unknown>
+
+fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bea <unknown>
+
+fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bf5 <unknown>
+
+fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
new file mode 100644
index 0000000000000..ccc074225b6a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.4b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4h, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v0.8s, v0.8s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4d, v0.8d, v0.8d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
new file mode 100644
index 0000000000000..6b3d352d285d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm< %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32mm -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.4s, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0x40,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e40ec00 <unknown>
+
+fmmla v10.4s, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0x4a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e4aed4a <unknown>
+
+fmmla v21.4s, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.4s, v21.8h, v...
[truncated]
|
|
@llvm/pr-subscribers-clang Author: Jonathan Thackray (jthackray) ChangesAdd support for new Advanced SIMD (Neon) instructions:
as documented here:
Co-authored-by: Kerry McLaughlin <[email protected]> Patch is 27.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163165.diff 13 Files Affected:
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 50c3610123646..7975b5ab7cb83 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -18,6 +18,8 @@
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
+// CHECK-NEXT: f16f32dot FEAT_F16F32DOT Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
+// CHECK-NEXT: f16f32mm FEAT_F16F32MM Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 5f943d39321f9..d2838d5065d28 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d2f282eadd302..1bc9aea52a265 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}
+// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
+def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
+}]> {
+ let ParserMatchClass = Imm1_16Operand;
+}
+
+// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
+def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
+}]> {
+ let ParserMatchClass = Imm1_8Operand;
+}
+
+
// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
@@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 66df27050723a..b28dffc67eb1e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -256,6 +256,10 @@ def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f92badab9a1eb..49de93ee1d5b9 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3894,6 +3894,8 @@ static const struct Extension {
{"sve2p3", {AArch64::FeatureSVE2p3}},
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
{"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
index cf8d216581240..15efbee14ddc1 100644
--- a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
fmmla v0.8h, v1.8h, v2.8h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot-diagnostics.s b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
new file mode 100644
index 0000000000000..4f5f557644094
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fdot v0.2s, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// fdot indexed
+
+fdot v0.2s, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate range
+
+fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot.s b/llvm/test/MC/AArch64/neon-fdot.s
new file mode 100644
index 0000000000000..c8a8e2f9023e1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot.s
@@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fdot v0.2s, v0.4h, v0.4h
+// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e80fc00 <unknown>
+
+fdot v10.2s, v10.4h, v10.4h
+// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e8afd4a <unknown>
+
+fdot v31.2s, v31.4h, v31.4h
+// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e9fffff <unknown>
+
+fdot v0.4s, v0.8h, v0.8h
+// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e80fc00 <unknown>
+
+fdot v10.4s, v10.8h, v10.8h
+// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e8afd4a <unknown>
+
+fdot v31.4s, v31.8h, v31.8h
+// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e9fffff <unknown>
+
+// fdot indexed
+
+fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409000 <unknown>
+
+fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40900a <unknown>
+
+fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409015 <unknown>
+
+fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40901f <unknown>
+
+fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409140 <unknown>
+
+fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40914a <unknown>
+
+fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409155 <unknown>
+
+fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40915f <unknown>
+
+fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aa0 <unknown>
+
+fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aaa <unknown>
+
+fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9ab5 <unknown>
+
+fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9abf <unknown>
+
+fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9be0 <unknown>
+
+fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bea <unknown>
+
+fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bf5 <unknown>
+
+fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
new file mode 100644
index 0000000000000..ccc074225b6a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.4b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4h, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v0.8s, v0.8s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4d, v0.8d, v0.8d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
new file mode 100644
index 0000000000000..6b3d352d285d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm< %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32mm -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.4s, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0x40,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e40ec00 <unknown>
+
+fmmla v10.4s, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0x4a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e4aed4a <unknown>
+
+fmmla v21.4s, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.4s, v21.8h, v...
[truncated]
|
03e8371 to
a6b0561
Compare
6eb5175 to
4cc767b
Compare
a6b0561 to
729c6b4
Compare
4cc767b to
70f531e
Compare
729c6b4 to
2f6800b
Compare
c634ff8 to
527a2a1
Compare
abba6f6 to
4e45059
Compare
527a2a1 to
8c3e7dc
Compare
4e45059 to
359ebd3
Compare
8c3e7dc to
e68ed5c
Compare
359ebd3 to
b63e81f
Compare
e68ed5c to
f4959c9
Compare
b63e81f to
42d5834
Compare
f4959c9 to
ce31713
Compare
42d5834 to
d049a2c
Compare
ce31713 to
ea7f287
Compare
eacb200 to
c5a2c2f
Compare
22b757c to
5956a3d
Compare
c5a2c2f to
2e9b06f
Compare
5956a3d to
c0e482b
Compare
2e9b06f to
8cdf16b
Compare
…nstructions Add support for new Advanced SIMD (Neon) instructions: - FDOT (half-precision to single-precision, by element) - FDOT (half-precision to single-precision, vector) - FMMLA (half-precision, non-widening) - FMMLA (widening, half-precision to single-precision) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions Co-authored-by: Kerry McLaughlin <[email protected]> Co-authored-by: Caroline Concatto <[email protected]> Co-authored-by: Virginia Cangelosi <[email protected]>
…Neon) instructions Remove timm32_1_16 and timm32_1_8, as they're superfluous.
…Neon) instructions Rename some `def`s after CR comments
…Neon) instructions Rename more `def`s
8cdf16b to
7ed20c8
Compare
…nstructions (llvm#163165) Add support for new Advanced SIMD (Neon) instructions: - FDOT (half-precision to single-precision, by element) - FDOT (half-precision to single-precision, vector) - FMMLA (half-precision, non-widening) - FMMLA (widening, half-precision to single-precision) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions Co-authored-by: Kerry McLaughlin <[email protected]> Co-authored-by: Caroline Concatto <[email protected]> Co-authored-by: Virginia Cangelosi <[email protected]>
…nstructions (llvm#163165) Add support for new Advanced SIMD (Neon) instructions: - FDOT (half-precision to single-precision, by element) - FDOT (half-precision to single-precision, vector) - FMMLA (half-precision, non-widening) - FMMLA (widening, half-precision to single-precision) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions Co-authored-by: Kerry McLaughlin <[email protected]> Co-authored-by: Caroline Concatto <[email protected]> Co-authored-by: Virginia Cangelosi <[email protected]>
…nstructions (llvm#163165) Add support for new Advanced SIMD (Neon) instructions: - FDOT (half-precision to single-precision, by element) - FDOT (half-precision to single-precision, vector) - FMMLA (half-precision, non-widening) - FMMLA (widening, half-precision to single-precision) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions Co-authored-by: Kerry McLaughlin <[email protected]> Co-authored-by: Caroline Concatto <[email protected]> Co-authored-by: Virginia Cangelosi <[email protected]>

Add support for new Advanced SIMD (Neon) instructions:
as documented here:
Co-authored-by: Kerry McLaughlin [email protected]
Co-authored-by: Caroline Concatto [email protected]
Co-authored-by: Virginia Cangelosi [email protected]