-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][Clang][NEON] Remove undefined vcmla intrinsics #112575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
arm_neon.td currently generates the same set of 24 vcmla intrinsic prototypes for the f16, f32, and f64 base types. This is incorrect, the only valid vcmla intrinsics for the f64 base type are: - vcmlaq_f64 - vcmlaq_rot90_f64 - vcmlaq_rot180_f64 - vcmlaq_rot270_f64 (see ACLE https://github.com/ARM-software/acle/blob/main/neon_intrinsics/advsimd.md) This patch removes these unwwanted intrinsic prototypes.
|
@llvm/pr-subscribers-clang Author: None (SpencerAbson) Changesarm_neon.td currently generates the same set of 24 vcmla intrinsic prototypes for the f16, f32, and f64 base types. This is incorrect, the only valid vcmla intrinsics for the f64 base type are:
(see ACLE https://github.com/ARM-software/acle/blob/main/neon_intrinsics/advsimd.md) This patch removes the incorrect intrinsic prototypes. Full diff: https://github.com/llvm/llvm-project/pull/112575.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 8652b5e3a9c901..ec829f566ef5fc 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1968,13 +1968,16 @@ let TargetGuard = "v8.3a,neon" in {
def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">;
def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
- defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
+ defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
}
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a,neon" in {
def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
- defm VCMLA_FP64 : VCMLA_ROTS<"d", "uint64x2_t", "uint64x2_t">;
+ def VCMLAQ_FP64 : SInst<"vcmlaq", "QQQQ", "d">;
+ def VCMLAQ_ROT90_FP64 : SInst<"vcmlaq_rot90", "QQQQ", "d">;
+ def VCMLAQ_ROT180_FP64 : SInst<"vcmlaq_rot180", "QQQQ", "d">;
+ def VCMLAQ_ROT270_FP64 : SInst<"vcmlaq_rot270", "QQQQ", "d">;
}
// V8.2-A BFloat intrinsics
diff --git a/clang/test/Sema/aarch64-neon-target.c b/clang/test/Sema/aarch64-neon-target.c
index fd84b7f2eb00a1..07d763ec84bd12 100644
--- a/clang/test/Sema/aarch64-neon-target.c
+++ b/clang/test/Sema/aarch64-neon-target.c
@@ -58,7 +58,7 @@ __attribute__((target("arch=armv8.3-a+fp16")))
void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) {
vcaddq_rot90_f32(v4f32, v4f32);
vcmla_rot90_f16(v4f16, v4f16, v4f16);
- vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1);
+ vcmlaq_rot270_f64(v2f64, v2f64, v2f64);
}
__attribute__((target("arch=armv8.5-a")))
@@ -95,7 +95,7 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t
// 8.3 - complex
vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}}
vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}}
- vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
+ vcmlaq_rot270_f64(v2f64, v2f64, v2f64); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
// 8.5 - frint
vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}}
diff --git a/clang/test/Sema/aarch64-vcmla-undef.c b/clang/test/Sema/aarch64-vcmla-undef.c
new file mode 100644
index 00000000000000..8a777ff6156339
--- /dev/null
+++ b/clang/test/Sema/aarch64-vcmla-undef.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify -verify-ignore-unexpected=note %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+void test(float64x1_t v1f64, float64x2_t v2f64) {
+ vcmla_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_f64'}}
+ vcmla_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_lane_f64'}}
+ vcmla_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_laneq_f64'}}
+ vcmlaq_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_lane_f64'}}
+ vcmlaq_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_laneq_f64'}}
+
+ vcmla_rot90_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot90_f64'}}
+ vcmla_rot90_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_lane_f64'}}
+ vcmla_rot90_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_laneq_f64'}}
+ vcmlaq_rot90_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_lane_f64'}}
+ vcmlaq_rot90_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_laneq_f64'}}
+
+ vcmla_rot180_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot180_f64'}}
+ vcmla_rot180_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_lane_f64'}}
+ vcmla_rot180_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_laneq_f64'}}
+ vcmlaq_rot180_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_lane_f64'}}
+ vcmlaq_rot180_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_laneq_f64'}}
+
+ vcmla_rot270_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot270_f64'}}
+ vcmla_rot270_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_lane_f64'}}
+ vcmla_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_laneq_f64'}}
+ vcmlaq_rot270_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_lane_f64'}}
+ vcmlaq_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_laneq_f64'}}
+}
|
momchil-velikov
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, cheers!
arm_neon.td currently generates the same 24
vcmlaintrinsic prototypes for each of the f16, f32, and f64 base types. This is incorrect, the only valid vcmla intrinsics for the f64 base type are:vcmlaq_f64vcmlaq_rot90_f64vcmlaq_rot180_f64vcmlaq_rot270_f64(see ACLE https://github.com/ARM-software/acle/blob/main/neon_intrinsics/advsimd.md)
This patch removes the incorrect intrinsic prototypes.