-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Clang][X86] Add avx512 __builtin_ia32_select* constexpr handling #152705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
de0a2d3 to
d294c88
Compare
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesThis should allow us to constexpr many avx512 predicated intrinsics where they wrap basic intrinsics that are already constexpr Fixes #152321 Patch is 45.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152705.diff 14 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..4f1dfddb53402 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4241,99 +4241,99 @@ let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVecto
def vfcmulcph512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Vector<16, float>, unsigned short, _Constant int)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectb_128 : X86Builtin<"_Vector<16, char>(unsigned short, _Vector<16, char>, _Vector<16, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectb_256 : X86Builtin<"_Vector<32, char>(unsigned int, _Vector<32, char>, _Vector<32, char>)">;
}
-let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectb_512 : X86Builtin<"_Vector<64, char>(unsigned long long int, _Vector<64, char>, _Vector<64, char>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectw_128 : X86Builtin<"_Vector<8, short>(unsigned char, _Vector<8, short>, _Vector<8, short>)">;
}
-let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectw_256 : X86Builtin<"_Vector<16, short>(unsigned short, _Vector<16, short>, _Vector<16, short>)">;
}
-let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectw_512 : X86Builtin<"_Vector<32, short>(unsigned int, _Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectd_128 : X86Builtin<"_Vector<4, int>(unsigned char, _Vector<4, int>, _Vector<4, int>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectd_256 : X86Builtin<"_Vector<8, int>(unsigned char, _Vector<8, int>, _Vector<8, int>)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectd_512 : X86Builtin<"_Vector<16, int>(unsigned short, _Vector<16, int>, _Vector<16, int>)">;
}
-let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectph_128 : X86Builtin<"_Vector<8, _Float16>(unsigned char, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
}
-let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectph_256 : X86Builtin<"_Vector<16, _Float16>(unsigned short, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
}
-let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectph_512 : X86Builtin<"_Vector<32, _Float16>(unsigned int, _Vector<32, _Float16>, _Vector<32, _Float16>)">;
}
-let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectpbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
}
-let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512bf16,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectpbf_256 : X86Builtin<"_Vector<16, __bf16>(unsigned short, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
}
-let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512bf16,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectpbf_512 : X86Builtin<"_Vector<32, __bf16>(unsigned int, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectq_128 : X86Builtin<"_Vector<2, long long int>(unsigned char, _Vector<2, long long int>, _Vector<2, long long int>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectq_256 : X86Builtin<"_Vector<4, long long int>(unsigned char, _Vector<4, long long int>, _Vector<4, long long int>)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectq_512 : X86Builtin<"_Vector<8, long long int>(unsigned char, _Vector<8, long long int>, _Vector<8, long long int>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectps_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectps_256 : X86Builtin<"_Vector<8, float>(unsigned char, _Vector<8, float>, _Vector<8, float>)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectps_512 : X86Builtin<"_Vector<16, float>(unsigned short, _Vector<16, float>, _Vector<16, float>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectpd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def selectpd_256 : X86Builtin<"_Vector<4, double>(unsigned char, _Vector<4, double>, _Vector<4, double>)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def selectpd_512 : X86Builtin<"_Vector<8, double>(unsigned char, _Vector<8, double>, _Vector<8, double>)">;
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 5cf2b9a0355ab..6595d8c69f0b2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11688,6 +11688,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_selectb_128:
+ case X86::BI__builtin_ia32_selectb_256:
+ case X86::BI__builtin_ia32_selectb_512:
+ case X86::BI__builtin_ia32_selectw_128:
+ case X86::BI__builtin_ia32_selectw_256:
+ case X86::BI__builtin_ia32_selectw_512:
+ case X86::BI__builtin_ia32_selectd_128:
+ case X86::BI__builtin_ia32_selectd_256:
+ case X86::BI__builtin_ia32_selectd_512:
+ case X86::BI__builtin_ia32_selectq_128:
+ case X86::BI__builtin_ia32_selectq_256:
+ case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
+ case X86::BI__builtin_ia32_selectpbf_128:
+ case X86::BI__builtin_ia32_selectpbf_256:
+ case X86::BI__builtin_ia32_selectpbf_512:
+ case X86::BI__builtin_ia32_selectps_128:
+ case X86::BI__builtin_ia32_selectps_256:
+ case X86::BI__builtin_ia32_selectps_512:
+ case X86::BI__builtin_ia32_selectpd_128:
+ case X86::BI__builtin_ia32_selectpd_256:
+ case X86::BI__builtin_ia32_selectpd_512: {
+ // AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
+ APValue SourceMask, SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceMask) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(2), SourceRHS))
+ return false;
+
+ APSInt Mask = SourceMask.getInt();
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ const APValue &LHS = SourceLHS.getVectorElt(EltNum);
+ const APValue &RHS = SourceRHS.getVectorElt(EltNum);
+ ResultElements.push_back(Mask[EltNum] ? LHS : RHS);
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
}
}
diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h
index 9a1ff8f39734f..76c1a158b223f 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -32,7 +32,7 @@ _mm512_popcnt_epi16(__m512i __A)
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
@@ -40,7 +40,7 @@ _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
(__v32hi) __A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(),
@@ -54,7 +54,7 @@ _mm512_popcnt_epi8(__m512i __A)
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
@@ -62,7 +62,7 @@ _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
(__v64qi) __A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(),
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 95b80cc59bb02..00ef9bb0ace9e 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8962,7 +8962,7 @@ _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
@@ -8970,7 +8970,7 @@ _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
(__v8df) __W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
@@ -8978,7 +8978,7 @@ _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
(__v8df) _mm512_setzero_pd ());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
@@ -8986,7 +8986,7 @@ _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
(__v16sf) __W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h
index 739e78aab753d..e29a149df5f90 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -38,7 +38,7 @@ _mm256_popcnt_epi16(__m256i __A)
return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
@@ -46,7 +46,7 @@ _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
(__v16hi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
@@ -60,7 +60,7 @@ _mm_popcnt_epi16(__m128i __A)
return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
@@ -68,7 +68,7 @@ _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
(__v8hi) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
{
return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
@@ -82,7 +82,7 @@ _mm256_popcnt_epi8(__m256i __A)
return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
@@ -90,7 +90,7 @@ _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
(__v32qi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
@@ -104,7 +104,7 @@ _mm_popcnt_epi8(__m128i __A)
return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
@@ -112,7 +112,7 @@ _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
(__v16qi) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
{
return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index cbad39acad84f..fd1bd291ecbcb 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -23,6 +23,14 @@
__target__("avx512vl,no-evex512"), \
__min_vector_width__(256)))
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
typedef short __v2hi __attribute__((__vector_size__(4)));
typedef char __v4qi __attribute__((__vector_size__(4)));
typedef char __v2qi __attribute__((__vector_size__(2)));
@@ -8258,7 +8266,7 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
(__v4si)_mm_shuffle_epi32((A), (I)), \
(__v4si)_mm_setzero_si128()))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
{
return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
@@ -8266,7 +8274,7 @@ _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
(__v2df) __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
{
return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
@@ -8274,7 +8282,7 @@ _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
(__v2df) _mm_setzero_pd ());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
{
return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
@@ -8282,7 +8290,7 @@ _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
(__v4df) __W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
{
return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
@@ -8290,7 +8298,7 @@ _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
(__v4df) _mm256_setzero_pd ());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
{
return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
@@ -8298,7 +8306,7 @@ _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
(__v4sf) __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
{
return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
@@ -8306,7 +8314,7 @@ _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
(__v4sf) _mm_setzero_ps ());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
{
return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
@@ -8314,7 +8322,7 @@ _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
(__v8sf) __W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mov_ps (__mmask8 __...
[truncated]
|
d294c88 to
ceff509
Compare
|
ping? |
|
Ping? |
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
|
This introduced an unused variable: |
This should allow us to constexpr many avx512 predicated intrinsics where they wrap basic intrinsics that are already constexpr
Fixes #152321