-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[AArch64][llvm] Armv9.7-A: Add support for new Advanced SIMD (Neon) instructions #163165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jthackray/armv9.7a-sve-lut
Are you sure you want to change the base?
[AArch64][llvm] Armv9.7-A: Add support for new Advanced SIMD (Neon) instructions #163165
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang-driver Author: Jonathan Thackray (jthackray) ChangesAdd support for new Advanced SIMD (Neon) instructions:
as documented here:
Co-authored-by: Kerry McLaughlin <[email protected]> Patch is 27.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163165.diff 13 Files Affected:
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 50c3610123646..7975b5ab7cb83 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -18,6 +18,8 @@
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
+// CHECK-NEXT: f16f32dot FEAT_F16F32DOT Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
+// CHECK-NEXT: f16f32mm FEAT_F16F32MM Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 5f943d39321f9..d2838d5065d28 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d2f282eadd302..1bc9aea52a265 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}
+// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
+def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
+}]> {
+ let ParserMatchClass = Imm1_16Operand;
+}
+
+// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
+def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
+}]> {
+ let ParserMatchClass = Imm1_8Operand;
+}
+
+
// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
@@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 66df27050723a..b28dffc67eb1e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -256,6 +256,10 @@ def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f92badab9a1eb..49de93ee1d5b9 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3894,6 +3894,8 @@ static const struct Extension {
{"sve2p3", {AArch64::FeatureSVE2p3}},
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
{"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
index cf8d216581240..15efbee14ddc1 100644
--- a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
fmmla v0.8h, v1.8h, v2.8h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot-diagnostics.s b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
new file mode 100644
index 0000000000000..4f5f557644094
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fdot v0.2s, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// fdot indexed
+
+fdot v0.2s, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate range
+
+fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot.s b/llvm/test/MC/AArch64/neon-fdot.s
new file mode 100644
index 0000000000000..c8a8e2f9023e1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot.s
@@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fdot v0.2s, v0.4h, v0.4h
+// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e80fc00 <unknown>
+
+fdot v10.2s, v10.4h, v10.4h
+// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e8afd4a <unknown>
+
+fdot v31.2s, v31.4h, v31.4h
+// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e9fffff <unknown>
+
+fdot v0.4s, v0.8h, v0.8h
+// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e80fc00 <unknown>
+
+fdot v10.4s, v10.8h, v10.8h
+// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e8afd4a <unknown>
+
+fdot v31.4s, v31.8h, v31.8h
+// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e9fffff <unknown>
+
+// fdot indexed
+
+fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409000 <unknown>
+
+fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40900a <unknown>
+
+fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409015 <unknown>
+
+fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40901f <unknown>
+
+fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409140 <unknown>
+
+fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40914a <unknown>
+
+fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409155 <unknown>
+
+fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40915f <unknown>
+
+fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aa0 <unknown>
+
+fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aaa <unknown>
+
+fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9ab5 <unknown>
+
+fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9abf <unknown>
+
+fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9be0 <unknown>
+
+fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bea <unknown>
+
+fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bf5 <unknown>
+
+fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
new file mode 100644
index 0000000000000..ccc074225b6a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.4b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4h, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v0.8s, v0.8s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4d, v0.8d, v0.8d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
new file mode 100644
index 0000000000000..6b3d352d285d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm< %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32mm -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.4s, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0x40,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e40ec00 <unknown>
+
+fmmla v10.4s, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0x4a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e4aed4a <unknown>
+
+fmmla v21.4s, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.4s, v21.8h, v...
[truncated]
|
@llvm/pr-subscribers-clang Author: Jonathan Thackray (jthackray) ChangesAdd support for new Advanced SIMD (Neon) instructions:
as documented here:
Co-authored-by: Kerry McLaughlin <[email protected]> Patch is 27.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163165.diff 13 Files Affected:
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 50c3610123646..7975b5ab7cb83 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -18,6 +18,8 @@
// CHECK-NEXT: d128 FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128 Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers and instructions
// CHECK-NEXT: dit FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
+// CHECK-NEXT: f16f32dot FEAT_F16F32DOT Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision
+// CHECK-NEXT: f16f32mm FEAT_F16F32MM Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision
// CHECK-NEXT: f16mm FEAT_F16MM Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 5f943d39321f9..d2838d5065d28 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -619,6 +619,12 @@ def FeatureSVE_B16MM : ExtensionWithMArch<"sve-b16mm", "SVE_B16MM", "FEAT_SVE_B1
def FeatureF16MM : ExtensionWithMArch<"f16mm", "F16MM", "FEAT_F16MM",
"Enable Armv9.7-A non-widening half-precision matrix multiply-accumulate", [FeatureFullFP16]>;
+def FeatureF16F32DOT : ExtensionWithMArch<"f16f32dot", "F16F32DOT", "FEAT_F16F32DOT",
+ "Enable Armv9.7-A Advanced SIMD half-precision dot product accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
+def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM",
+ "Enable Armv9.7-A Advanced SIMD half-precision matrix multiply-accumulate to single-precision", [FeatureNEON, FeatureFullFP16]>;
+
//===----------------------------------------------------------------------===//
// Other Features
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d2f282eadd302..1bc9aea52a265 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1166,6 +1166,21 @@ def timm32_0_15 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_15Operand;
}
+// timm32_1_16 predicate - True if the 32-bit immediate is in the range [1,16]
+def timm32_1_16 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 17);
+}]> {
+ let ParserMatchClass = Imm1_16Operand;
+}
+
+// timm32_1_8 predicate - True if the 32-bit immediate is in the range [1,8]
+def timm32_1_8 : Operand<i32>, TImmLeaf<i32, [{
+ return ((uint32_t)Imm > 0 && (uint32_t)Imm < 9);
+}]> {
+ let ParserMatchClass = Imm1_8Operand;
+}
+
+
// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31]
def timm32_0_31 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 32;
@@ -6538,8 +6553,7 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
multiclass SIMDThreeSameVectorMLA<bit Q, string asm, SDPatternOperator op> {
-
- def v8f16 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
+ def v16f8 : BaseSIMDThreeSameVectorDot<Q, 0b0, 0b11, 0b1111, asm, ".8h", ".16b",
V128, v8f16, v16i8, op>;
}
@@ -6548,6 +6562,23 @@ multiclass SIMDThreeSameVectorMLAL<bit Q, bits<2> sz, string asm, SDPatternOpera
V128, v4f32, v16i8, op>;
}
+multiclass SIMDThreeSameVectorFMLA<string asm> {
+ def v8f16tov8f16 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b11, 0b1101, asm, ".8h", ".8h",
+ V128, v8f16, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFMLAWiden<string asm> {
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b01, 0b1101, asm, ".4s", ".8h",
+ V128, v4f32, v8f16, null_frag>;
+}
+
+multiclass SIMDThreeSameVectorFDot<string asm, SDPatternOperator OpNode = null_frag> {
+ def v2f32tov4f16 : BaseSIMDThreeSameVectorDot<0, 0, 0b10, 0b1111, asm, ".2s", ".4h", V64,
+ v2f32, v4f16, OpNode>;
+ def v4f32tov8f16 : BaseSIMDThreeSameVectorDot<1, 0, 0b10, 0b1111, asm, ".4s", ".8h", V128,
+ v4f32, v8f16, OpNode>;
+}
+
// FP8 assembly/disassembly classes
//----------------------------------------------------------------------------
@@ -9169,6 +9200,13 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, V128_lo, v4f32, v8f16, VectorIndexH, OpNode>;
}
+multiclass SIMDThreeSameVectorFDOTIndex<string asm> {
+ def v4f16tov2f32 : BaseSIMDThreeSameVectorIndexS<0b0, 0b0, 0b01, 0b1001, asm, ".2s", ".4h", ".2h",
+ V64, v2f32, v4f16, VectorIndexS, null_frag>;
+ def v8f16tov4f32 : BaseSIMDThreeSameVectorIndexS<0b1, 0b0, 0b01, 0b1001, asm, ".4s", ".8h",".2h",
+ V128, v4f32, v8f16, VectorIndexS, null_frag>;
+}
+
//----------------------------------------------------------------------------
// FP8 Advanced SIMD vector x indexed element
multiclass SIMD_FP8_Dot2_Index<string asm, SDPatternOperator op> {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 66df27050723a..b28dffc67eb1e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -256,6 +256,10 @@ def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">;
+def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">;
+def HasF16F32MM : Predicate<"Subtarget->hasF16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF16F32MM), "f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -11281,8 +11285,19 @@ let Predicates = [HasLSFE] in {
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
}
+let Predicates = [HasF16F32DOT] in {
+ defm FDOT :SIMDThreeSameVectorFDot<"fdot">;
+ defm FDOTlane: SIMDThreeSameVectorFDOTIndex<"fdot">;
+}
+
+let Predicates = [HasF16MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLA<"fmmla">;
+
+let Predicates = [HasF16F32MM] in
+ defm FMMLA : SIMDThreeSameVectorFMLAWiden<"fmmla">;
+
let Uses = [FPMR, FPCR] in
-defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
//===----------------------------------------------------------------------===//
// Contention Management Hints (FEAT_CMH)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f92badab9a1eb..49de93ee1d5b9 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3894,6 +3894,8 @@ static const struct Extension {
{"sve2p3", {AArch64::FeatureSVE2p3}},
{"sve-b16mm", {AArch64::FeatureSVE_B16MM}},
{"f16mm", {AArch64::FeatureF16MM}},
+ {"f16f32dot", {AArch64::FeatureF16F32DOT}},
+ {"f16f32mm", {AArch64::FeatureF16F32MM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
index cf8d216581240..15efbee14ddc1 100644
--- a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -16,7 +16,7 @@ fmmla v0.4s, v1.4s, v2.4s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
fmmla v0.8h, v1.8h, v2.8h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f16mm
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot-diagnostics.s b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
new file mode 100644
index 0000000000000..4f5f557644094
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=f16f32dot 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fdot v0.2s, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// fdot indexed
+
+fdot v0.2s, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2b, v0.4b, v0.4b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2b, v0.4b, v0.4b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4s, v0.4s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2s, v0.4s, v0.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2h, v0.4h, v0.4h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fdot v0.2h, v0.4h, v0.4h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid immediate range
+
+fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
+// CHECK-NEXT: fdot v0.2s, v0.4h, v0.2h[4]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fdot.s b/llvm/test/MC/AArch64/neon-fdot.s
new file mode 100644
index 0000000000000..c8a8e2f9023e1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fdot.s
@@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32dot < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32dot --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32dot < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32dot -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fdot v0.2s, v0.4h, v0.4h
+// CHECK-INST: fdot v0.2s, v0.4h, v0.4h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e80fc00 <unknown>
+
+fdot v10.2s, v10.4h, v10.4h
+// CHECK-INST: fdot v10.2s, v10.4h, v10.4h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e8afd4a <unknown>
+
+fdot v31.2s, v31.4h, v31.4h
+// CHECK-INST: fdot v31.2s, v31.4h, v31.4h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x0e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0e9fffff <unknown>
+
+fdot v0.4s, v0.8h, v0.8h
+// CHECK-INST: fdot v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xfc,0x80,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e80fc00 <unknown>
+
+fdot v10.4s, v10.8h, v10.8h
+// CHECK-INST: fdot v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xfd,0x8a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e8afd4a <unknown>
+
+fdot v31.4s, v31.8h, v31.8h
+// CHECK-INST: fdot v31.4s, v31.8h, v31.8h
+// CHECK-ENCODING: encoding: [0xff,0xff,0x9f,0x4e]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4e9fffff <unknown>
+
+// fdot indexed
+
+fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x00,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409000 <unknown>
+
+fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x0a,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40900a <unknown>
+
+fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x15,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409015 <unknown>
+
+fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v0.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x1f,0x90,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40901f <unknown>
+
+fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v0.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x40,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409140 <unknown>
+
+fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v10.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x4a,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40914a <unknown>
+
+fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v21.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x55,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f409155 <unknown>
+
+fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-INST: fdot v31.2s, v10.4h, v0.2h[0]
+// CHECK-ENCODING: encoding: [0x5f,0x91,0x40,0x0f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 0f40915f <unknown>
+
+fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xa0,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aa0 <unknown>
+
+fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xaa,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9aaa <unknown>
+
+fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xb5,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9ab5 <unknown>
+
+fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v21.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xbf,0x9a,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9abf <unknown>
+
+fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v0.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xe0,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9be0 <unknown>
+
+fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v10.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xea,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bea <unknown>
+
+fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v21.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xf5,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bf5 <unknown>
+
+fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-INST: fdot v31.4s, v31.8h, v31.2h[3]
+// CHECK-ENCODING: encoding: [0xff,0x9b,0x7f,0x4f]
+// CHECK-ERROR: instruction requires: f16f32dot
+// CHECK-UNKNOWN: 4f7f9bff <unknown>
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
new file mode 100644
index 0000000000000..ccc074225b6a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f16f32mm 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand/vector
+
+fmmla v0.4b, v0.8b, v0.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4b, v0.8b, v0.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4h, v0.8h, v0.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v0.8h, v0.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v0.8s, v0.8s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4s, v0.8s, v0.8s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4d, v0.8d, v0.8d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.4d, v0.8d, v0.8d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/neon-fmmla-HtoS.s b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
new file mode 100644
index 0000000000000..6b3d352d285d8
--- /dev/null
+++ b/llvm/test/MC/AArch64/neon-fmmla-HtoS.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm< %s \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=+f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f16f32mm < %s \
+// RUN: | llvm-objdump -d --mattr=-f16f32mm --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f16f32mm < %s \
+// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN: | llvm-mc -triple=aarch64 -mattr=+f16f32mm -disassemble -show-encoding \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.4s, v0.8h, v0.8h
+// CHECK-INST: fmmla v0.4s, v0.8h, v0.8h
+// CHECK-ENCODING: encoding: [0x00,0xec,0x40,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e40ec00 <unknown>
+
+fmmla v10.4s, v10.8h, v10.8h
+// CHECK-INST: fmmla v10.4s, v10.8h, v10.8h
+// CHECK-ENCODING: encoding: [0x4a,0xed,0x4a,0x4e]
+// CHECK-ERROR: instruction requires: f16f32mm
+// CHECK-UNKNOWN: 4e4aed4a <unknown>
+
+fmmla v21.4s, v21.8h, v21.8h
+// CHECK-INST: fmmla v21.4s, v21.8h, v...
[truncated]
|
03e8371
to
a6b0561
Compare
6eb5175
to
4cc767b
Compare
a6b0561
to
729c6b4
Compare
…nstructions Add support for new Advanced SIMD (Neon) instructions: - FDOT (half-precision to single-precision, by element) - FDOT (half-precision to single-precision, vector) - FMMLA (half-precision, non-widening) - FMMLA (widening, half-precision to single-precision) as documented here: * https://developer.arm.com/documentation/ddi0602/2025-09/ * https://developer.arm.com/documentation/109697/2025_09/2025-Architecture-Extensions Co-authored-by: Kerry McLaughlin <[email protected]> Co-authored-by: Caroline Concatto <[email protected]> Co-authored-by: Virginia Cangelosi <[email protected]>
4cc767b
to
70f531e
Compare
729c6b4
to
2f6800b
Compare
Add support for new Advanced SIMD (Neon) instructions:
as documented here:
Co-authored-by: Kerry McLaughlin [email protected]
Co-authored-by: Caroline Concatto [email protected]
Co-authored-by: Virginia Cangelosi [email protected]