Skip to content

Commit 3697879

Browse files
committed
[AArch64][Clang][NEON] Remove undefined vcmla intrinsics
arm_neon.td currently generates the same set of 24 vcmla intrinsic prototypes for the f16, f32, and f64 base types. This is incorrect, the only valid vcmla intrinsics for the f64 base type are: - vcmlaq_f64 - vcmlaq_rot90_f64 - vcmlaq_rot180_f64 - vcmlaq_rot270_f64 (see ACLE https://github.com/ARM-software/acle/blob/main/neon_intrinsics/advsimd.md) This patch removes these unwwanted intrinsic prototypes.
1 parent 4e6fa78 commit 3697879

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,13 +1968,16 @@ let TargetGuard = "v8.3a,neon" in {
19681968
def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">;
19691969
def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
19701970

1971-
defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
1971+
defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
19721972
}
19731973
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a,neon" in {
19741974
def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
19751975
def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
19761976

1977-
defm VCMLA_FP64 : VCMLA_ROTS<"d", "uint64x2_t", "uint64x2_t">;
1977+
def VCMLAQ_FP64 : SInst<"vcmlaq", "QQQQ", "d">;
1978+
def VCMLAQ_ROT90_FP64 : SInst<"vcmlaq_rot90", "QQQQ", "d">;
1979+
def VCMLAQ_ROT180_FP64 : SInst<"vcmlaq_rot180", "QQQQ", "d">;
1980+
def VCMLAQ_ROT270_FP64 : SInst<"vcmlaq_rot270", "QQQQ", "d">;
19781981
}
19791982

19801983
// V8.2-A BFloat intrinsics

clang/test/Sema/aarch64-neon-target.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ __attribute__((target("arch=armv8.3-a+fp16")))
5858
void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) {
5959
vcaddq_rot90_f32(v4f32, v4f32);
6060
vcmla_rot90_f16(v4f16, v4f16, v4f16);
61-
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1);
61+
vcmlaq_rot270_f64(v2f64, v2f64, v2f64);
6262
}
6363

6464
__attribute__((target("arch=armv8.5-a")))
@@ -95,7 +95,7 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t
9595
// 8.3 - complex
9696
vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}}
9797
vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}}
98-
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
98+
vcmlaq_rot270_f64(v2f64, v2f64, v2f64); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
9999
// 8.5 - frint
100100
vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}}
101101

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify -verify-ignore-unexpected=note %s
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
#include <arm_neon.h>
6+
7+
void test(float64x1_t v1f64, float64x2_t v2f64) {
8+
vcmla_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_f64'}}
9+
vcmla_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_lane_f64'}}
10+
vcmla_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_laneq_f64'}}
11+
vcmlaq_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_lane_f64'}}
12+
vcmlaq_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_laneq_f64'}}
13+
14+
vcmla_rot90_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot90_f64'}}
15+
vcmla_rot90_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_lane_f64'}}
16+
vcmla_rot90_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_laneq_f64'}}
17+
vcmlaq_rot90_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_lane_f64'}}
18+
vcmlaq_rot90_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_laneq_f64'}}
19+
20+
vcmla_rot180_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot180_f64'}}
21+
vcmla_rot180_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_lane_f64'}}
22+
vcmla_rot180_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_laneq_f64'}}
23+
vcmlaq_rot180_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_lane_f64'}}
24+
vcmlaq_rot180_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_laneq_f64'}}
25+
26+
vcmla_rot270_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot270_f64'}}
27+
vcmla_rot270_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_lane_f64'}}
28+
vcmla_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_laneq_f64'}}
29+
vcmlaq_rot270_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_lane_f64'}}
30+
vcmlaq_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_laneq_f64'}}
31+
}

0 commit comments

Comments
 (0)