Skip to content

Commit 6abf4f3

Browse files
authored
[Headers][X86] Allow AVX movddup/movsldup/movshdup intrinsics to be used in constexpr (#152340)
Matches SSE3 handling
1 parent b83f7f1 commit 6abf4f3

File tree

4 files changed

+12
-6
lines changed

4 files changed

+12
-6
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5303,7 +5303,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
53035303
(__mmask8) __U);
53045304
}
53055305

5306-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
5306+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
53075307
_mm512_movedup_pd (__m512d __A)
53085308
{
53095309
return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
@@ -8665,7 +8665,7 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
86658665
_mm512_setzero_si512());
86668666
}
86678667

8668-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
8668+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
86698669
_mm512_movehdup_ps (__m512 __A)
86708670
{
86718671
return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
@@ -8688,7 +8688,7 @@ _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
86888688
(__v16sf)_mm512_setzero_ps());
86898689
}
86908690

8691-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
8691+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
86928692
_mm512_moveldup_ps (__m512 __A)
86938693
{
86948694
return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,

clang/lib/Headers/avxintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,7 +2392,7 @@ _mm256_cvtss_f32(__m256 __a)
23922392
/// return value.
23932393
/// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
23942394
/// values.
2395-
static __inline __m256 __DEFAULT_FN_ATTRS
2395+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
23962396
_mm256_movehdup_ps(__m256 __a)
23972397
{
23982398
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
@@ -2417,7 +2417,7 @@ _mm256_movehdup_ps(__m256 __a)
24172417
/// return value.
24182418
/// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
24192419
/// values.
2420-
static __inline __m256 __DEFAULT_FN_ATTRS
2420+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
24212421
_mm256_moveldup_ps(__m256 __a)
24222422
{
24232423
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
@@ -2439,7 +2439,7 @@ _mm256_moveldup_ps(__m256 __a)
24392439
/// the return value.
24402440
/// \returns A 256-bit vector of [4 x double] containing the moved and
24412441
/// duplicated values.
2442-
static __inline __m256d __DEFAULT_FN_ATTRS
2442+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
24432443
_mm256_movedup_pd(__m256d __a)
24442444
{
24452445
return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,18 +1262,21 @@ __m256d test_mm256_movedup_pd(__m256d A) {
12621262
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
12631263
return _mm256_movedup_pd(A);
12641264
}
1265+
TEST_CONSTEXPR(match_m256d(_mm256_movedup_pd((__m256d){+7.0, -7.0, -42.0, +42.0}), +7.0, +7.0, -42.0, -42.0));
12651266

12661267
__m256 test_mm256_movehdup_ps(__m256 A) {
12671268
// CHECK-LABEL: test_mm256_movehdup_ps
12681269
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
12691270
return _mm256_movehdup_ps(A);
12701271
}
1272+
TEST_CONSTEXPR(match_m256(_mm256_movehdup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), -1.0f, -1.0f, +4.0f, +4.0f, -8.0f, -8.0f, +3.0f, +3.0f));
12711273

12721274
__m256 test_mm256_moveldup_ps(__m256 A) {
12731275
// CHECK-LABEL: test_mm256_moveldup_ps
12741276
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
12751277
return _mm256_moveldup_ps(A);
12761278
}
1279+
TEST_CONSTEXPR(match_m256(_mm256_moveldup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +8.0f, +8.0f, -3.0f, -3.0f));
12771280

12781281
int test_mm256_movemask_pd(__m256d A) {
12791282
// CHECK-LABEL: test_mm256_movemask_pd

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4395,6 +4395,7 @@ __m512d test_mm512_movedup_pd(__m512d __A) {
43954395
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
43964396
return _mm512_movedup_pd(__A);
43974397
}
4398+
TEST_CONSTEXPR(match_m512d(_mm512_movedup_pd((__m512d){-1.0, +2.0, +3.0, +4.0, -5.0, -6.0, +7.0, +8.0}), -1.0, -1.0, +3.0, +3.0, -5.0, -5.0, +7.0, +7.0));
43984399

43994400
__m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) {
44004401
// CHECK-LABEL: test_mm512_mask_movedup_pd
@@ -8691,6 +8692,7 @@ __m512 test_mm512_movehdup_ps(__m512 __A) {
86918692
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
86928693
return _mm512_movehdup_ps(__A);
86938694
}
8695+
TEST_CONSTEXPR(match_m512(_mm512_movehdup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), -1.0f, -1.0f, -2.0f, -2.0f, -3.0f, -3.0f, -4.0f, -4.0f, -5.0f, -5.0f, -6.0f, -6.0f, -7.0f, -7.0f, -8.0f, -8.0f));
86948696

86958697
__m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
86968698
// CHECK-LABEL: test_mm512_mask_movehdup_ps
@@ -8711,6 +8713,7 @@ __m512 test_mm512_moveldup_ps(__m512 __A) {
87118713
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
87128714
return _mm512_moveldup_ps(__A);
87138715
}
8716+
TEST_CONSTEXPR(match_m512(_mm512_moveldup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +3.0f, +3.0f, +4.0f, +4.0f, +5.0f, +5.0f, +6.0f, +6.0f, +7.0f, +7.0f, +8.0f, +8.0f));
87148717

87158718
__m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
87168719
// CHECK-LABEL: test_mm512_mask_moveldup_ps

0 commit comments

Comments
 (0)