Skip to content

Commit 93cac97

Browse files
bojleRKSimon
andauthored
[Headers][X86] Allow AVX512fp16 initialization intrinsics to be used in constexpr (#159929)
Fixes #156866 --------- Signed-off-by: Shreeyash Pandey <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 90a6884 commit 93cac97

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

clang/lib/Headers/avx512vlfp16intrin.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
4242
return __a[0];
4343
}
4444

45-
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
45+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
46+
_mm_set_sh(_Float16 __h) {
4647
return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
4748
}
4849

@@ -57,23 +58,23 @@ _mm256_set1_ph(_Float16 __h) {
5758
__h, __h, __h, __h, __h, __h, __h, __h};
5859
}
5960

60-
static __inline __m128h __DEFAULT_FN_ATTRS128
61+
static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
6162
_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
6263
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
6364
return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
6465
}
6566

66-
static __inline __m256h __DEFAULT_FN_ATTRS256
67+
static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
6768
_mm256_set1_pch(_Float16 _Complex h) {
6869
return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
6970
}
7071

71-
static __inline __m128h __DEFAULT_FN_ATTRS128
72+
static __inline __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
7273
_mm_set1_pch(_Float16 _Complex h) {
7374
return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
7475
}
7576

76-
static __inline __m256h __DEFAULT_FN_ATTRS256
77+
static __inline __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
7778
_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
7879
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
7980
_Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
@@ -83,13 +84,13 @@ _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
8384
__h4, __h3, __h2, __h1};
8485
}
8586

86-
static __inline__ __m128h __DEFAULT_FN_ATTRS128
87+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
8788
_mm_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
8889
_Float16 e5, _Float16 e6, _Float16 e7) {
8990
return _mm_set_ph(e7, e6, e5, e4, e3, e2, e1, e0);
9091
}
9192

92-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
93+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
9394
_mm256_setr_ph(_Float16 e0, _Float16 e1, _Float16 e2, _Float16 e3, _Float16 e4,
9495
_Float16 e5, _Float16 e6, _Float16 e7, _Float16 e8, _Float16 e9,
9596
_Float16 e10, _Float16 e11, _Float16 e12, _Float16 e13,

clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ __m128h test_mm_set_sh(_Float16 __h) {
3737
return _mm_set_sh(__h);
3838
}
3939

40+
TEST_CONSTEXPR(match_m128h(_mm_set_sh(2.0), 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0));
41+
4042
__m128h test_mm_set1_ph(_Float16 h) {
4143
// CHECK-LABEL: test_mm_set1_ph
4244
// CHECK: insertelement <8 x half> {{.*}}, i32 0
@@ -84,6 +86,8 @@ __m128h test_mm_set1_pch(_Float16 _Complex h) {
8486
return _mm_set1_pch(h);
8587
}
8688

89+
TEST_CONSTEXPR(match_m128h(_mm_set1_pch(1.0), 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0));
90+
8791
__m256h test_mm256_set1_pch(_Float16 _Complex h) {
8892
// CHECK-LABEL: test_mm256_set1_pch
8993
// CHECK: insertelement <8 x float> {{.*}}, i32 0
@@ -97,6 +101,8 @@ __m256h test_mm256_set1_pch(_Float16 _Complex h) {
97101
return _mm256_set1_pch(h);
98102
}
99103

104+
TEST_CONSTEXPR(match_m256h(_mm256_set1_pch(1.0), 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0));
105+
100106
__m128h test_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
101107
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
102108
// CHECK-LABEL: test_mm_set_ph
@@ -110,6 +116,7 @@ __m128h test_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h
110116
// CHECK: insertelement <8 x half> {{.*}}, i32 7
111117
return _mm_set_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8);
112118
}
119+
TEST_CONSTEXPR(match_m128h(_mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0));
113120

114121
__m256h test_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
115122
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
@@ -136,6 +143,8 @@ __m256h test_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16
136143
__h9, __h10, __h11, __h12, __h13, __h14, __h15, __h16);
137144
}
138145

146+
TEST_CONSTEXPR(match_m256h(_mm256_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0));
147+
139148
__m128h test_mm_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
140149
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
141150
// CHECK-LABEL: test_mm_setr_ph
@@ -150,6 +159,8 @@ __m128h test_mm_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __
150159
return _mm_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8);
151160
}
152161

162+
TEST_CONSTEXPR(match_m128h(_mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0));
163+
153164
__m256h test_mm256_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
154165
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
155166
_Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
@@ -175,6 +186,8 @@ __m256h test_mm256_setr_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16
175186
__h9, __h10, __h11, __h12, __h13, __h14, __h15, __h16);
176187
}
177188

189+
TEST_CONSTEXPR(match_m256h(_mm256_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0));
190+
178191
__m256h test_mm256_add_ph(__m256h __A, __m256h __B) {
179192
// CHECK-LABEL: test_mm256_add_ph
180193
// CHECK: %{{.*}} = fadd <16 x half> %{{.*}}, %{{.*}}

0 commit comments

Comments
 (0)