Skip to content

Commit e64224a

Browse files
authored
[Headers][X86] Allow AVX cast intrinsics to be used in constexpr (#152730)
Still missing the "extend to 256-bit" casts - _mm256_castpd128_pd256 / _mm256_castps128_ps256 / _mm256_castsi128_si256 - due to constexpr not liking undefined/poison etc.
1 parent 76a533c commit e64224a

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

clang/lib/Headers/avxintrin.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4367,7 +4367,7 @@ _mm256_setzero_si256(void) {
43674367
/// A 256-bit floating-point vector of [4 x double].
43684368
/// \returns A 256-bit floating-point vector of [8 x float] containing the same
43694369
/// bitwise pattern as the parameter.
4370-
static __inline __m256 __DEFAULT_FN_ATTRS
4370+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
43714371
_mm256_castpd_ps(__m256d __a)
43724372
{
43734373
return (__m256)__a;
@@ -4384,7 +4384,7 @@ _mm256_castpd_ps(__m256d __a)
43844384
/// A 256-bit floating-point vector of [4 x double].
43854385
/// \returns A 256-bit integer vector containing the same bitwise pattern as the
43864386
/// parameter.
4387-
static __inline __m256i __DEFAULT_FN_ATTRS
4387+
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
43884388
_mm256_castpd_si256(__m256d __a)
43894389
{
43904390
return (__m256i)__a;
@@ -4401,7 +4401,7 @@ _mm256_castpd_si256(__m256d __a)
44014401
/// A 256-bit floating-point vector of [8 x float].
44024402
/// \returns A 256-bit floating-point vector of [4 x double] containing the same
44034403
/// bitwise pattern as the parameter.
4404-
static __inline __m256d __DEFAULT_FN_ATTRS
4404+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
44054405
_mm256_castps_pd(__m256 __a)
44064406
{
44074407
return (__m256d)__a;
@@ -4418,7 +4418,7 @@ _mm256_castps_pd(__m256 __a)
44184418
/// A 256-bit floating-point vector of [8 x float].
44194419
/// \returns A 256-bit integer vector containing the same bitwise pattern as the
44204420
/// parameter.
4421-
static __inline __m256i __DEFAULT_FN_ATTRS
4421+
static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
44224422
_mm256_castps_si256(__m256 __a)
44234423
{
44244424
return (__m256i)__a;
@@ -4435,7 +4435,7 @@ _mm256_castps_si256(__m256 __a)
44354435
/// A 256-bit integer vector.
44364436
/// \returns A 256-bit floating-point vector of [8 x float] containing the same
44374437
/// bitwise pattern as the parameter.
4438-
static __inline __m256 __DEFAULT_FN_ATTRS
4438+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
44394439
_mm256_castsi256_ps(__m256i __a)
44404440
{
44414441
return (__m256)__a;
@@ -4452,7 +4452,7 @@ _mm256_castsi256_ps(__m256i __a)
44524452
/// A 256-bit integer vector.
44534453
/// \returns A 256-bit floating-point vector of [4 x double] containing the same
44544454
/// bitwise pattern as the parameter.
4455-
static __inline __m256d __DEFAULT_FN_ATTRS
4455+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
44564456
_mm256_castsi256_pd(__m256i __a)
44574457
{
44584458
return (__m256d)__a;
@@ -4469,7 +4469,7 @@ _mm256_castsi256_pd(__m256i __a)
44694469
/// A 256-bit floating-point vector of [4 x double].
44704470
/// \returns A 128-bit floating-point vector of [2 x double] containing the
44714471
/// lower 128 bits of the parameter.
4472-
static __inline __m128d __DEFAULT_FN_ATTRS
4472+
static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
44734473
_mm256_castpd256_pd128(__m256d __a)
44744474
{
44754475
return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
@@ -4486,7 +4486,7 @@ _mm256_castpd256_pd128(__m256d __a)
44864486
/// A 256-bit floating-point vector of [8 x float].
44874487
/// \returns A 128-bit floating-point vector of [4 x float] containing the
44884488
/// lower 128 bits of the parameter.
4489-
static __inline __m128 __DEFAULT_FN_ATTRS
4489+
static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
44904490
_mm256_castps256_ps128(__m256 __a)
44914491
{
44924492
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
@@ -4502,7 +4502,7 @@ _mm256_castps256_ps128(__m256 __a)
45024502
/// A 256-bit integer vector.
45034503
/// \returns A 128-bit integer vector containing the lower 128 bits of the
45044504
/// parameter.
4505-
static __inline __m128i __DEFAULT_FN_ATTRS
4505+
static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
45064506
_mm256_castsi256_si128(__m256i __a)
45074507
{
45084508
return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,13 @@ __m256 test_mm256_castpd_ps(__m256d A) {
147147
// CHECK-LABEL: test_mm256_castpd_ps
148148
return _mm256_castpd_ps(A);
149149
}
150+
TEST_CONSTEXPR(match_m256(_mm256_castpd_ps((__m256d){-1.0, +2.0, +4.0, -6.0}), +0.0f, -1.875f, +0.0f, +2.0f, +0.0f, +2.25f, 0.0f, -2.375f));
150151

151152
__m256i test_mm256_castpd_si256(__m256d A) {
152153
// CHECK-LABEL: test_mm256_castpd_si256
153154
return _mm256_castpd_si256(A);
154155
}
156+
TEST_CONSTEXPR(match_m256i(_mm256_castpd_si256((__m256d){-1.0, +2.0, -3.0, +4.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL, 0xC008000000000000ULL, 0x4010000000000000ULL));
155157

156158
__m256d test_mm256_castpd128_pd256(__m128d A) {
157159
// CHECK-LABEL: test_mm256_castpd128_pd256
@@ -165,16 +167,19 @@ __m128d test_mm256_castpd256_pd128(__m256d A) {
165167
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 0, i32 1>
166168
return _mm256_castpd256_pd128(A);
167169
}
170+
TEST_CONSTEXPR(match_m128d(_mm256_castpd256_pd128((__m256d){-1.0, +2.0, -3.0, +4.0}), -1.0, +2.0));
168171

169172
__m256d test_mm256_castps_pd(__m256 A) {
170173
// CHECK-LABEL: test_mm256_castps_pd
171174
return _mm256_castps_pd(A);
172175
}
176+
TEST_CONSTEXPR(match_m256d(_mm256_castps_pd((__m256){0.0f, -1.0f, 0.0f, 4.0f, 0.0f, -2.0f, 0.0f, 6.0f}), -0.0078125, 512.0, -2.0, +8192.0));
173177

174178
__m256i test_mm256_castps_si256(__m256 A) {
175179
// CHECK-LABEL: test_mm256_castps_si256
176180
return _mm256_castps_si256(A);
177181
}
182+
TEST_CONSTEXPR(match_m256i(_mm256_castps_si256((__m256){1.0f, -2.0f, -4.0f, 8.0f, -16.0f, +16.0f, +32.0f, -32.0f}), 0xC00000003F800000ULL, 0x41000000c0800000ULL, 0x41800000C1800000ULL, 0xC200000042000000ULL));
178183

179184
__m256 test_mm256_castps128_ps256(__m128 A) {
180185
// CHECK-LABEL: test_mm256_castps128_ps256
@@ -188,6 +193,7 @@ __m128 test_mm256_castps256_ps128(__m256 A) {
188193
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
189194
return _mm256_castps256_ps128(A);
190195
}
196+
TEST_CONSTEXPR(match_m128(_mm256_castps256_ps128((__m256){1.0f, -2.0f, -4.0f, 8.0f, -16.0f, +16.0f, +32.0f, -32.0f}), 1.0f, -2.0f, -4.0f, 8.0f));
191197

192198
__m256i test_mm256_castsi128_si256(__m128i A) {
193199
// CHECK-LABEL: test_mm256_castsi128_si256
@@ -200,17 +206,20 @@ __m256d test_mm256_castsi256_pd(__m256i A) {
200206
// CHECK-LABEL: test_mm256_castsi256_pd
201207
return _mm256_castsi256_pd(A);
202208
}
209+
TEST_CONSTEXPR(match_m256d(_mm256_castsi256_pd((__m256i)(__v4du){0x4070000000000000ULL, 0xC000000000000000ULL, 0xBFF0000000000000ULL, 0xC008000000000000ULL}), 256.0, -2.0, -1.0, -3.0));
203210

204211
__m256 test_mm256_castsi256_ps(__m256i A) {
205212
// CHECK-LABEL: test_mm256_castsi256_ps
206213
return _mm256_castsi256_ps(A);
207214
}
215+
TEST_CONSTEXPR(match_m256(_mm256_castsi256_ps((__m256i)(__v4du){0x42000000c1800000ULL, 0x43000000c2800000ULL, 0x41000000c0800000ULL, 0xC00000003F800000ULL}), -16.0f, 32.0f, -64.0f, 128.0f, -4.0f, 8.0f, 1.0f, -2.0f));
208216

209217
__m128i test_mm256_castsi256_si128(__m256i A) {
210218
// CHECK-LABEL: test_mm256_castsi256_si128
211219
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
212220
return _mm256_castsi256_si128(A);
213221
}
222+
TEST_CONSTEXPR(match_m128i(_mm256_castsi256_si128((__m256i)(__v4du){0xBFF0000000000000ULL, 0x4070000000000000ULL, 0xC000000000000000ULL, 0xC008000000000000ULL}), 0xBFF0000000000000ULL, 0x4070000000000000ULL));
214223

215224
__m256d test_mm256_ceil_pd(__m256d x) {
216225
// CHECK-LABEL: test_mm256_ceil_pd

0 commit comments

Comments
 (0)