Skip to content

[Headers][X86] Allow AVX movddup/movsldup/movshdup intrinsics to be used in constexpr #152340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -5303,7 +5303,7 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
(__mmask8) __U);
}

static __inline__ __m512d __DEFAULT_FN_ATTRS512
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_movedup_pd (__m512d __A)
{
return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
Expand Down Expand Up @@ -8665,7 +8665,7 @@ _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
_mm512_setzero_si512());
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_movehdup_ps (__m512 __A)
{
return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
Expand All @@ -8688,7 +8688,7 @@ _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
(__v16sf)_mm512_setzero_ps());
}

static __inline__ __m512 __DEFAULT_FN_ATTRS512
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_moveldup_ps (__m512 __A)
{
return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2392,7 +2392,7 @@ _mm256_cvtss_f32(__m256 __a)
/// return value.
/// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
/// values.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_movehdup_ps(__m256 __a)
{
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
Expand All @@ -2417,7 +2417,7 @@ _mm256_movehdup_ps(__m256 __a)
/// return value.
/// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
/// values.
static __inline __m256 __DEFAULT_FN_ATTRS
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_moveldup_ps(__m256 __a)
{
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
Expand All @@ -2439,7 +2439,7 @@ _mm256_moveldup_ps(__m256 __a)
/// the return value.
/// \returns A 256-bit vector of [4 x double] containing the moved and
/// duplicated values.
static __inline __m256d __DEFAULT_FN_ATTRS
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_movedup_pd(__m256d __a)
{
return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
Expand Down
3 changes: 3 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1258,18 +1258,21 @@ __m256d test_mm256_movedup_pd(__m256d A) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
return _mm256_movedup_pd(A);
}
TEST_CONSTEXPR(match_m256d(_mm256_movedup_pd((__m256d){+7.0, -7.0, -42.0, +42.0}), +7.0, +7.0, -42.0, -42.0));

__m256 test_mm256_movehdup_ps(__m256 A) {
// CHECK-LABEL: test_mm256_movehdup_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
return _mm256_movehdup_ps(A);
}
TEST_CONSTEXPR(match_m256(_mm256_movehdup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), -1.0f, -1.0f, +4.0f, +4.0f, -8.0f, -8.0f, +3.0f, +3.0f));

__m256 test_mm256_moveldup_ps(__m256 A) {
// CHECK-LABEL: test_mm256_moveldup_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
return _mm256_moveldup_ps(A);
}
TEST_CONSTEXPR(match_m256(_mm256_moveldup_ps((__m256){+1.0f,-1.0f,+2.0f,+4.0f,+8.0f,-8.0f,-3.0f,+3.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +8.0f, +8.0f, -3.0f, -3.0f));

int test_mm256_movemask_pd(__m256d A) {
// CHECK-LABEL: test_mm256_movemask_pd
Expand Down
3 changes: 3 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -4386,6 +4386,7 @@ __m512d test_mm512_movedup_pd(__m512d __A) {
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
return _mm512_movedup_pd(__A);
}
TEST_CONSTEXPR(match_m512d(_mm512_movedup_pd((__m512d){-1.0, +2.0, +3.0, +4.0, -5.0, -6.0, +7.0, +8.0}), -1.0, -1.0, +3.0, +3.0, -5.0, -5.0, +7.0, +7.0));

__m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: test_mm512_mask_movedup_pd
Expand Down Expand Up @@ -8682,6 +8683,7 @@ __m512 test_mm512_movehdup_ps(__m512 __A) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
return _mm512_movehdup_ps(__A);
}
TEST_CONSTEXPR(match_m512(_mm512_movehdup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), -1.0f, -1.0f, -2.0f, -2.0f, -3.0f, -3.0f, -4.0f, -4.0f, -5.0f, -5.0f, -6.0f, -6.0f, -7.0f, -7.0f, -8.0f, -8.0f));

__m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
// CHECK-LABEL: test_mm512_mask_movehdup_ps
Expand All @@ -8702,6 +8704,7 @@ __m512 test_mm512_moveldup_ps(__m512 __A) {
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
return _mm512_moveldup_ps(__A);
}
TEST_CONSTEXPR(match_m512(_mm512_moveldup_ps((__m512){+1.0f,-1.0f,+2.0f,-2.0f,+3.0f,-3.0f,+4.0f,-4.0f,+5.0f,-5.0f,+6.0f,-6.0f,+7.0f,-7.0f,+8.0f,-8.0f}), +1.0f, +1.0f, +2.0f, +2.0f, +3.0f, +3.0f, +4.0f, +4.0f, +5.0f, +5.0f, +6.0f, +6.0f, +7.0f, +7.0f, +8.0f, +8.0f));

__m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
// CHECK-LABEL: test_mm512_mask_moveldup_ps
Expand Down
Loading