Skip to content

Commit af1bb28

Browse files
committed
[AArch64][ARM] Alter v8.3a complex neon intrinsics to be target-based, not preprocessor based
This alters the 8.3 complex intrinsics to be target-gated, as opposed to hidden behind preprocessor macros. This is the last of arm_neon.h, and follows the same formula as before. Differential Revision: https://reviews.llvm.org/D135647
1 parent b92725e commit af1bb28

File tree

4 files changed

+64
-32
lines changed

4 files changed

+64
-32
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,23 +1964,23 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
19641964
}
19651965

19661966
// v8.3-A Vector complex addition intrinsics
1967-
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)", TargetGuard = "fullfp16" in {
1967+
let TargetGuard = "v8.3a,fullfp16" in {
19681968
def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">;
19691969
def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">;
19701970
def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">;
19711971
def VCADDQ_ROT270_FP16 : SInst<"vcaddq_rot270", "QQQ", "h">;
19721972

19731973
defm VCMLA_FP16 : VCMLA_ROTS<"h", "uint32x2_t", "uint32x4_t">;
19741974
}
1975-
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in {
1975+
let TargetGuard = "v8.3a" in {
19761976
def VCADD_ROT90 : SInst<"vcadd_rot90", "...", "f">;
19771977
def VCADD_ROT270 : SInst<"vcadd_rot270", "...", "f">;
19781978
def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">;
19791979
def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
19801980

19811981
defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
19821982
}
1983-
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in {
1983+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.3a" in {
19841984
def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
19851985
def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
19861986

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5667,10 +5667,16 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
56675667
NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
56685668
NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
56695669
NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
5670-
NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
5671-
NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
5672-
NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
5673-
NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
5670+
NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
5671+
NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
5672+
NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
5673+
NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
5674+
NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
5675+
NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
5676+
NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
5677+
NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
5678+
NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
5679+
NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
56745680
NEONMAP1(vcage_v, arm_neon_vacge, 0),
56755681
NEONMAP1(vcageq_v, arm_neon_vacge, 0),
56765682
NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
@@ -5985,10 +5991,16 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
59855991
NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
59865992
NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
59875993
NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
5988-
NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
5989-
NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
5990-
NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
5991-
NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
5994+
NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
5995+
NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
5996+
NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
5997+
NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
5998+
NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
5999+
NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6000+
NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6001+
NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6002+
NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6003+
NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
59926004
NEONMAP1(vcage_v, aarch64_neon_facge, 0),
59936005
NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
59946006
NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
@@ -6011,14 +6023,26 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
60116023
NEONMAP0(vcltzq_v),
60126024
NEONMAP1(vclz_v, ctlz, Add1ArgType),
60136025
NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6014-
NEONMAP1(vcmla_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
6015-
NEONMAP1(vcmla_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
6016-
NEONMAP1(vcmla_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
6017-
NEONMAP1(vcmla_v, aarch64_neon_vcmla_rot0, Add1ArgType),
6018-
NEONMAP1(vcmlaq_rot180_v, aarch64_neon_vcmla_rot180, Add1ArgType),
6019-
NEONMAP1(vcmlaq_rot270_v, aarch64_neon_vcmla_rot270, Add1ArgType),
6020-
NEONMAP1(vcmlaq_rot90_v, aarch64_neon_vcmla_rot90, Add1ArgType),
6021-
NEONMAP1(vcmlaq_v, aarch64_neon_vcmla_rot0, Add1ArgType),
6026+
NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6027+
NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6028+
NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6029+
NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6030+
NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6031+
NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6032+
NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6033+
NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6034+
NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6035+
NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6036+
NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6037+
NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6038+
NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6039+
NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6040+
NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6041+
NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6042+
NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6043+
NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6044+
NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6045+
NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
60226046
NEONMAP1(vcnt_v, ctpop, Add1ArgType),
60236047
NEONMAP1(vcntq_v, ctpop, Add1ArgType),
60246048
NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
@@ -6456,10 +6480,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
64566480
{ NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
64576481
{ NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
64586482
{ NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
6459-
{ NEON::BI__builtin_neon_vcadd_rot270_f16, NEON::BI__builtin_neon_vcadd_rot270_v, },
6460-
{ NEON::BI__builtin_neon_vcadd_rot90_f16, NEON::BI__builtin_neon_vcadd_rot90_v, },
6461-
{ NEON::BI__builtin_neon_vcaddq_rot270_f16, NEON::BI__builtin_neon_vcaddq_rot270_v, },
6462-
{ NEON::BI__builtin_neon_vcaddq_rot90_f16, NEON::BI__builtin_neon_vcaddq_rot90_v, },
64636483
{ NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
64646484
{ NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
64656485
{ NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
@@ -6478,14 +6498,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
64786498
{ NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
64796499
{ NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
64806500
{ NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
6481-
{ NEON::BI__builtin_neon_vcmla_f16, NEON::BI__builtin_neon_vcmla_v, },
6482-
{ NEON::BI__builtin_neon_vcmla_rot180_f16, NEON::BI__builtin_neon_vcmla_rot180_v, },
6483-
{ NEON::BI__builtin_neon_vcmla_rot270_f16, NEON::BI__builtin_neon_vcmla_rot270_v, },
6484-
{ NEON::BI__builtin_neon_vcmla_rot90_f16, NEON::BI__builtin_neon_vcmla_rot90_v, },
6485-
{ NEON::BI__builtin_neon_vcmlaq_f16, NEON::BI__builtin_neon_vcmlaq_v, },
6486-
{ NEON::BI__builtin_neon_vcmlaq_rot180_f16, NEON::BI__builtin_neon_vcmlaq_rot180_v, },
6487-
{ NEON::BI__builtin_neon_vcmlaq_rot270_f16, NEON::BI__builtin_neon_vcmlaq_rot270_v, },
6488-
{ NEON::BI__builtin_neon_vcmlaq_rot90_f16, NEON::BI__builtin_neon_vcmlaq_rot90_v, },
64896501
{ NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
64906502
{ NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
64916503
{ NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },

clang/test/Sema/aarch64-neon-target.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,19 @@ void test_v81(int32x2_t d, int32x4_t v, int s) {
4848
vqrdmlahh_s16(1, 1, 1);
4949
}
5050

51+
__attribute__((target("arch=armv8.3-a+fp16")))
52+
void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) {
53+
vcaddq_rot90_f32(v4f32, v4f32);
54+
vcmla_rot90_f16(v4f16, v4f16, v4f16);
55+
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1);
56+
}
57+
5158
__attribute__((target("arch=armv8.5-a")))
5259
void test_v85(float32x4_t v4f32) {
5360
vrnd32xq_f32(v4f32);
5461
}
5562

56-
void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16, __bf16 bf16) {
63+
void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64, bfloat16x4_t v4bf16, __bf16 bf16) {
5764
// dotprod
5865
vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}}
5966
vdot_laneq_u32(v2i32, v8i8, v16i8, 1); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}}
@@ -79,6 +86,10 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t
7986
vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}}
8087
vqrdmlah_laneq_s32(v2i32, v2i32, v4i32, 1); // expected-error {{always_inline function 'vqrdmlah_s32' requires target feature 'v8.1a'}}
8188
vqrdmlahh_s16(1, 1, 1); // expected-error {{always_inline function 'vqrdmlahh_s16' requires target feature 'v8.1a'}}
89+
// 8.3 - complex
90+
vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}}
91+
vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}}
92+
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
8293
// 8.5 - frint
8394
vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}}
8495
}

clang/test/Sema/arm-neon-target.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ void test_v81(int32x2_t d, int32x4_t v, int s) {
3838
vqrdmlahq_s32(v, v, v);
3939
}
4040

41+
__attribute__((target("v8.3a,fullfp16")))
42+
void test_v83(float32x4_t v4f32, float16x4_t v4f16) {
43+
vcaddq_rot90_f32(v4f32, v4f32);
44+
vcmla_rot90_f16(v4f16, v4f16, v4f16);
45+
}
46+
4147
void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t v16i8, uint8x8_t v8i8, float32x2_t v2f32, float32x4_t v4f32, float16x4_t v4f16, bfloat16x4_t v4bf16, __bf16 bf16) {
4248
// dotprod
4349
vdot_u32(v2i32, v8i8, v8i8); // expected-error {{always_inline function 'vdot_u32' requires target feature 'dotprod'}}
@@ -57,4 +63,7 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t
5763
vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}}
5864
// v8.1 - qrdmla
5965
vqrdmlahq_s32(v4i32, v4i32, v4i32); // expected-error {{always_inline function 'vqrdmlahq_s32' requires target feature 'v8.1a'}}
66+
// 8.3 - complex
67+
vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}}
68+
vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}}
6069
}

0 commit comments

Comments
 (0)