[Headers][X86] Allow AVX cast intrinsics to be used in constexpr #152730

RKSimon · 2025-08-08T14:15:11Z

Still missing the "extend to 256-bit" casts - _mm256_castpd128_pd256 etc. - due to constexpr not liking undefined/poison etc.

llvmbot · 2025-08-08T14:15:42Z

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Simon Pilgrim (RKSimon)

Changes

Still missing the "extend to 256-bit" casts - _mm256_castpd128_pd256 etc. - due to constexpr not liking undefined/poison etc.

Full diff: https://github.com/llvm/llvm-project/pull/152730.diff

2 Files Affected:

(modified) clang/lib/Headers/avxintrin.h (+9-9)
(modified) clang/test/CodeGen/X86/avx-builtins.c (+9)

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 2be4f68067a5c..5a6d48bc246e3 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4367,7 +4367,7 @@ _mm256_setzero_si256(void) {
 ///    A 256-bit floating-point vector of [4 x double].
 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castpd_ps(__m256d __a)
 {
   return (__m256)__a;
@@ -4384,7 +4384,7 @@ _mm256_castpd_ps(__m256d __a)
 ///    A 256-bit floating-point vector of [4 x double].
 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castpd_si256(__m256d __a)
 {
   return (__m256i)__a;
@@ -4401,7 +4401,7 @@ _mm256_castpd_si256(__m256d __a)
 ///    A 256-bit floating-point vector of [8 x float].
 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castps_pd(__m256 __a)
 {
   return (__m256d)__a;
@@ -4418,7 +4418,7 @@ _mm256_castps_pd(__m256 __a)
 ///    A 256-bit floating-point vector of [8 x float].
 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castps_si256(__m256 __a)
 {
   return (__m256i)__a;
@@ -4435,7 +4435,7 @@ _mm256_castps_si256(__m256 __a)
 ///    A 256-bit integer vector.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castsi256_ps(__m256i __a)
 {
   return (__m256)__a;
@@ -4452,7 +4452,7 @@ _mm256_castsi256_ps(__m256i __a)
 ///    A 256-bit integer vector.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
 ///    bitwise pattern as the parameter.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castsi256_pd(__m256i __a)
 {
   return (__m256d)__a;
@@ -4469,7 +4469,7 @@ _mm256_castsi256_pd(__m256i __a)
 ///    A 256-bit floating-point vector of [4 x double].
 /// \returns A 128-bit floating-point vector of [2 x double] containing the
 ///    lower 128 bits of the parameter.
-static __inline __m128d __DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castpd256_pd128(__m256d __a)
 {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
@@ -4486,7 +4486,7 @@ _mm256_castpd256_pd128(__m256d __a)
 ///    A 256-bit floating-point vector of [8 x float].
 /// \returns A 128-bit floating-point vector of [4 x float] containing the
 ///    lower 128 bits of the parameter.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castps256_ps128(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
@@ -4502,7 +4502,7 @@ _mm256_castps256_ps128(__m256 __a)
 ///    A 256-bit integer vector.
 /// \returns A 128-bit integer vector containing the lower 128 bits of the
 ///    parameter.
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_castsi256_si128(__m256i __a)
 {
   return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index e2c9f96139fc0..28cad0041a21a 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -147,11 +147,13 @@ __m256 test_mm256_castpd_ps(__m256d A) {
   // CHECK-LABEL: test_mm256_castpd_ps
   return _mm256_castpd_ps(A);
 }
+TEST_CONSTEXPR(match_m256(_mm256_castpd_ps((__m256d){-1.0, +2.0, +4.0, -6.0}), +0.0f, -1.875f, +0.0f, +2.0f, +0.0f, +2.25f, 0.0f, -2.375f));
 
 __m256i test_mm256_castpd_si256(__m256d A) {
   // CHECK-LABEL: test_mm256_castpd_si256
   return _mm256_castpd_si256(A);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_castpd_si256((__m256d){-1.0, +2.0, -3.0, +4.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL, 0xC008000000000000ULL, 0x4010000000000000ULL));
 
 __m256d test_mm256_castpd128_pd256(__m128d A) {
   // CHECK-LABEL: test_mm256_castpd128_pd256
@@ -165,16 +167,19 @@ __m128d test_mm256_castpd256_pd128(__m256d A) {
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 0, i32 1>
   return _mm256_castpd256_pd128(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm256_castpd256_pd128((__m256d){-1.0, +2.0, -3.0, +4.0}), -1.0, +2.0));
 
 __m256d test_mm256_castps_pd(__m256 A) {
   // CHECK-LABEL: test_mm256_castps_pd
   return _mm256_castps_pd(A);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_castps_pd((__m256){0.0f, -1.0f, 0.0f, 4.0f, 0.0f, -2.0f, 0.0f, 6.0f}), -0.0078125, 512.0, -2.0, +8192.0));
 
 __m256i test_mm256_castps_si256(__m256 A) {
   // CHECK-LABEL: test_mm256_castps_si256
   return _mm256_castps_si256(A);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_castps_si256((__m256){1.0f, -2.0f, -4.0f, 8.0f, -16.0f, +16.0f, +32.0f, -32.0f}), 0xC00000003F800000ULL, 0x41000000c0800000ULL, 0x41800000C1800000ULL, 0xC200000042000000ULL));
 
 __m256 test_mm256_castps128_ps256(__m128 A) {
   // CHECK-LABEL: test_mm256_castps128_ps256
@@ -188,6 +193,7 @@ __m128 test_mm256_castps256_ps128(__m256 A) {
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_castps256_ps128(A);
 }
+TEST_CONSTEXPR(match_m128(_mm256_castps256_ps128((__m256){1.0f, -2.0f, -4.0f, 8.0f, -16.0f, +16.0f, +32.0f, -32.0f}), 1.0f, -2.0f, -4.0f, 8.0f));
 
 __m256i test_mm256_castsi128_si256(__m128i A) {
   // CHECK-LABEL: test_mm256_castsi128_si256
@@ -200,17 +206,20 @@ __m256d test_mm256_castsi256_pd(__m256i A) {
   // CHECK-LABEL: test_mm256_castsi256_pd
   return _mm256_castsi256_pd(A);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_castsi256_pd((__m256i)(__v4du){0x4070000000000000ULL, 0xC000000000000000ULL, 0xBFF0000000000000ULL, 0xC008000000000000ULL}), 256.0, -2.0, -1.0, -3.0));
 
 __m256 test_mm256_castsi256_ps(__m256i A) {
   // CHECK-LABEL: test_mm256_castsi256_ps
   return _mm256_castsi256_ps(A);
 }
+TEST_CONSTEXPR(match_m256(_mm256_castsi256_ps((__m256i)(__v4du){0x42000000c1800000ULL, 0x43000000c2800000ULL, 0x41000000c0800000ULL, 0xC00000003F800000ULL}), -16.0f, 32.0f, -64.0f, 128.0f, -4.0f, 8.0f, 1.0f, -2.0f));
 
 __m128i test_mm256_castsi256_si128(__m256i A) {
   // CHECK-LABEL: test_mm256_castsi256_si128
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
   return _mm256_castsi256_si128(A);
 }
+TEST_CONSTEXPR(match_m128i(_mm256_castsi256_si128((__m256i)(__v4du){0xBFF0000000000000ULL, 0x4070000000000000ULL, 0xC000000000000000ULL, 0xC008000000000000ULL}), 0xBFF0000000000000ULL, 0x4070000000000000ULL));
 
 __m256d test_mm256_ceil_pd(__m256d x) {
   // CHECK-LABEL: test_mm256_ceil_pd

github-actions · 2025-08-08T14:18:26Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff HEAD~1 HEAD --extensions c,h -- clang/lib/Headers/avxintrin.h clang/test/CodeGen/X86/avx-builtins.c

View the diff from clang-format here.

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 5a6d48bc2..9a8249994 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4368,8 +4368,7 @@ _mm256_setzero_si256(void) {
 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
 ///    bitwise pattern as the parameter.
 static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castpd_ps(__m256d __a)
-{
+_mm256_castpd_ps(__m256d __a) {
   return (__m256)__a;
 }
 
@@ -4385,8 +4384,7 @@ _mm256_castpd_ps(__m256d __a)
 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
 static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castpd_si256(__m256d __a)
-{
+_mm256_castpd_si256(__m256d __a) {
   return (__m256i)__a;
 }
 
@@ -4402,8 +4400,7 @@ _mm256_castpd_si256(__m256d __a)
 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
 ///    bitwise pattern as the parameter.
 static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castps_pd(__m256 __a)
-{
+_mm256_castps_pd(__m256 __a) {
   return (__m256d)__a;
 }
 
@@ -4419,8 +4416,7 @@ _mm256_castps_pd(__m256 __a)
 /// \returns A 256-bit integer vector containing the same bitwise pattern as the
 ///    parameter.
 static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castps_si256(__m256 __a)
-{
+_mm256_castps_si256(__m256 __a) {
   return (__m256i)__a;
 }
 
@@ -4436,8 +4432,7 @@ _mm256_castps_si256(__m256 __a)
 /// \returns A 256-bit floating-point vector of [8 x float] containing the same
 ///    bitwise pattern as the parameter.
 static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castsi256_ps(__m256i __a)
-{
+_mm256_castsi256_ps(__m256i __a) {
   return (__m256)__a;
 }
 
@@ -4453,8 +4448,7 @@ _mm256_castsi256_ps(__m256i __a)
 /// \returns A 256-bit floating-point vector of [4 x double] containing the same
 ///    bitwise pattern as the parameter.
 static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castsi256_pd(__m256i __a)
-{
+_mm256_castsi256_pd(__m256i __a) {
   return (__m256d)__a;
 }
 
@@ -4470,8 +4464,7 @@ _mm256_castsi256_pd(__m256i __a)
 /// \returns A 128-bit floating-point vector of [2 x double] containing the
 ///    lower 128 bits of the parameter.
 static __inline __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castpd256_pd128(__m256d __a)
-{
+_mm256_castpd256_pd128(__m256d __a) {
   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
 }
 
@@ -4487,8 +4480,7 @@ _mm256_castpd256_pd128(__m256d __a)
 /// \returns A 128-bit floating-point vector of [4 x float] containing the
 ///    lower 128 bits of the parameter.
 static __inline __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castps256_ps128(__m256 __a)
-{
+_mm256_castps256_ps128(__m256 __a) {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
 }
 
@@ -4503,8 +4495,7 @@ _mm256_castps256_ps128(__m256 __a)
 /// \returns A 128-bit integer vector containing the lower 128 bits of the
 ///    parameter.
 static __inline __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_castsi256_si128(__m256i __a)
-{
+_mm256_castsi256_si128(__m256i __a) {
   return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
 }

[Headers][X86] Allow AVX cast intrinsics to be used in constexpr

aefead2

Still missing the "extend to 256-bit" casts - _mm256_castpd128_pd256 etc. - due to constexpr not liking undefined/poison etc.

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Aug 8, 2025

RKSimon merged commit e64224a into llvm:main Aug 8, 2025
12 of 13 checks passed

RKSimon deleted the x86-avx1-cast-constexpr branch August 8, 2025 14:39

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Headers][X86] Allow AVX cast intrinsics to be used in constexpr #152730

[Headers][X86] Allow AVX cast intrinsics to be used in constexpr #152730

RKSimon commented Aug 8, 2025

Uh oh!

llvmbot commented Aug 8, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Aug 8, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

[Headers][X86] Allow AVX cast intrinsics to be used in constexpr #152730

[Headers][X86] Allow AVX cast intrinsics to be used in constexpr #152730

Conversation

RKSimon commented Aug 8, 2025

Uh oh!

llvmbot commented Aug 8, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Aug 8, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

llvmbot commented Aug 8, 2025 •

edited

Loading