diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index dc9fc07314323..3c3a3d1453934 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -31,6 +31,14 @@ __min_vector_width__(128))) #endif +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#endif + /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Computes sixteen sum of absolute difference (SAD) operations on sets of /// four unsigned 8-bit integers from the 256-bit integer vectors \a X and @@ -460,7 +468,7 @@ _mm256_adds_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); @@ -478,7 +486,7 @@ _mm256_and_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); @@ -1822,7 +1830,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); @@ -2974,7 +2982,7 @@ _mm256_unpacklo_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); @@ -5289,5 +5297,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif /* __AVX2INTRIN_H */ diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 88b48e3a32070..62325b96f202b 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -20,6 +20,14 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512dq,no-evex512"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + static __inline __mmask8 __DEFAULT_FN_ATTRS _knot_mask8(__mmask8 __M) { @@ -167,7 +175,7 @@ _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A ^ (__v8du)__B); } @@ -186,7 +194,7 @@ _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_ps (__m512 __A, __m512 __B) { return (__m512)((__v16su)__A ^ (__v16su)__B); } @@ -205,7 +213,7 @@ _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A | (__v8du)__B); } @@ -224,7 +232,7 @@ _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A | (__v16su)__B); } @@ -243,7 +251,7 @@ _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_pd(__m512d __A, __m512d __B) { return (__m512d)((__v8du)__A & (__v8du)__B); } @@ -262,7 +270,7 @@ _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_ps(__m512 __A, __m512 __B) { return (__m512)((__v16su)__A & (__v16su)__B); } @@ -281,7 +289,7 @@ _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } @@ -300,7 +308,7 @@ _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } @@ -1375,5 +1383,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 45e7eeb5327d0..21809a90d4d26 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -645,7 +645,7 @@ _mm512_zextsi256_si512(__m256i __a) } /* Bitwise operators */ -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a & (__v16su)__b); @@ -666,7 +666,7 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); @@ -687,13 +687,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_si512 (__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i)(~(__v16su)__A & (__v16su)__B); @@ -714,7 +714,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_andnot_epi64(__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); @@ -735,7 +735,7 @@ _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a | (__v16su)__b); @@ -755,7 +755,7 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); @@ -775,7 +775,7 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a ^ (__v16su)__b); @@ -795,7 +795,7 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -815,19 +815,19 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_and_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_or_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_xor_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index b9ca013c25c7a..5aa09300845bf 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -555,7 +555,7 @@ _mm256_rcp_ps(__m256 __a) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a & (__v4du)__b); @@ -573,7 +573,7 @@ _mm256_and_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_and_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a & (__v8su)__b); @@ -594,7 +594,7 @@ _mm256_and_ps(__m256 __a, __m256 __b) /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_pd(__m256d __a, __m256d __b) { return (__m256d)(~(__v4du)__a & (__v4du)__b); @@ -615,7 +615,7 @@ _mm256_andnot_pd(__m256d __a, __m256d __b) /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the /// values of the second operand and the one's complement of the first /// operand. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_andnot_ps(__m256 __a, __m256 __b) { return (__m256)(~(__v8su)__a & (__v8su)__b); @@ -633,7 +633,7 @@ _mm256_andnot_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a | (__v4du)__b); @@ -651,7 +651,7 @@ _mm256_or_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_or_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a | (__v8su)__b); @@ -669,7 +669,7 @@ _mm256_or_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the /// values between both operands. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4du)__a ^ (__v4du)__b); @@ -687,7 +687,7 @@ _mm256_xor_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the /// values between both operands. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_xor_ps(__m256 __a, __m256 __b) { return (__m256)((__v8su)__a ^ (__v8su)__b); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 78e8a422db4c1..9f8f60abdbb35 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2676,8 +2676,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise AND of the values /// in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_and_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a & (__v2du)__b); } @@ -2695,8 +2695,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, /// A 128-bit vector containing the right source operand. /// \returns A 128-bit integer vector containing the bitwise AND of the one's /// complement of the first operand and the values in the second operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_andnot_si128(__m128i __a, __m128i __b) { return (__m128i)(~(__v2du)__a & (__v2du)__b); } /// Performs a bitwise OR of two 128-bit integer vectors. @@ -2711,8 +2711,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise OR of the values /// in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_or_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a | (__v2du)__b); } @@ -2728,8 +2728,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, /// A 128-bit integer vector containing one of the source operands. /// \returns A 128-bit integer vector containing the bitwise exclusive OR of the /// values in both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_xor_si128(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a ^ (__v2du)__b); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index a6e70aae420ea..fc389b3a7790c 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -44,12 +44,14 @@ __m256d test_mm256_and_pd(__m256d A, __m256d B) { // CHECK: and <4 x i64> return _mm256_and_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_and_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -0.0, -0.0, +0.0, +7.0)); __m256 test_mm256_and_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_and_ps // CHECK: and <8 x i32> return _mm256_and_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f)); __m256d test_mm256_andnot_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_andnot_pd @@ -57,6 +59,7 @@ __m256d test_mm256_andnot_pd(__m256d A, __m256d B) { // CHECK: and <4 x i64> return _mm256_andnot_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_andnot_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), +0.0, +0.0, +0.0, +0.0)); __m256 test_mm256_andnot_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_andnot_ps @@ -64,6 +67,7 @@ __m256 test_mm256_andnot_ps(__m256 A, __m256 B) { // CHECK: and <8 x i32> return _mm256_andnot_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f)); __m256d test_mm256_blend_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_blend_pd @@ -1300,12 +1304,14 @@ __m256d test_mm256_or_pd(__m256d A, __m256d B) { // CHECK: or <4 x i64> return _mm256_or_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_or_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -4.0, -5.0, -6.0, +7.0)); __m256 test_mm256_or_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_or_ps // CHECK: or <8 x i32> return _mm256_or_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f)); __m128d test_mm_permute_pd(__m128d A) { // CHECK-LABEL: test_mm_permute_pd @@ -2077,12 +2083,14 @@ __m256d test_mm256_xor_pd(__m256d A, __m256d B) { // CHECK: xor <4 x i64> return _mm256_xor_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_xor_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){+0.0, -0.0, -0.0, +7.0}), -4.0, +5.0, -6.0, +0.0)); __m256 test_mm256_xor_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_xor_ps // CHECK: xor <8 x i32> return _mm256_xor_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f)); void test_mm256_zeroall(void) { // CHECK-LABEL: test_mm256_zeroall diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 27da56fb75723..1ed624cf63ab2 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -9,6 +9,7 @@ #include +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -97,6 +98,7 @@ __m256i test_mm256_and_si256(__m256i a, __m256i b) { // CHECK: and <4 x i64> return _mm256_and_si256(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_and_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, 0, 0, -1)); __m256i test_mm256_andnot_si256(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_andnot_si256 @@ -104,6 +106,7 @@ __m256i test_mm256_andnot_si256(__m256i a, __m256i b) { // CHECK: and <4 x i64> return _mm256_andnot_si256(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_andnot_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, 0, -1, 0)); __m256i test_mm256_avg_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_avg_epu8 @@ -920,6 +923,7 @@ __m256i test_mm256_or_si256(__m256i a, __m256i b) { // CHECK: or <4 x i64> return _mm256_or_si256(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_or_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, -1, -1, -1)); __m256i test_mm256_packs_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_packs_epi16 @@ -1336,3 +1340,4 @@ __m256i test_mm256_xor_si256(__m256i a, __m256i b) { // CHECK: xor <4 x i64> return _mm256_xor_si256(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_xor_si256((__m256i)(__v4di){0, -1, 0, -1}, (__m256i)(__v4di){0, 0, -1, -1}), 0, -1, -1, 0)); diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index 1ebd3696bce54..39fb92d826f9c 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -2,6 +2,7 @@ #include +#include "builtin_test_helpers.h" __mmask8 test_knot_mask8(__mmask8 a) { // CHECK-LABEL: @test_knot_mask8 @@ -267,6 +268,7 @@ __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_m512d(_mm512_xor_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, +5.0, -6.0, +0.0, +0.0, -6.0, +5.0, -4.0)); __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_xor_pd @@ -281,6 +283,7 @@ __m512 test_mm512_xor_ps (__m512 __A, __m512 __B) { // CHECK: xor <16 x i32> return (__m512) _mm512_xor_ps(__A, __B); } +TEST_CONSTEXPR(match_m512(_mm512_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f, -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f)); __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_xor_ps @@ -303,6 +306,7 @@ __m512d test_mm512_or_pd (__m512d __A, __m512d __B) { // CHECK: or <8 x i64> return (__m512d) _mm512_or_pd(__A, __B); } +TEST_CONSTEXPR(match_m512d(_mm512_or_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, -5.0, -6.0, +7.0, +7.0, -6.0, -5.0, -4.0)); __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_or_pd @@ -325,6 +329,7 @@ __m512 test_mm512_or_ps (__m512 __A, __m512 __B) { // CHECK: or <16 x i32> return (__m512) _mm512_or_ps(__A, __B); } +TEST_CONSTEXPR(match_m512(_mm512_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f, -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f)); __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_or_ps @@ -347,6 +352,7 @@ __m512d test_mm512_and_pd (__m512d __A, __m512d __B) { // CHECK: and <8 x i64> return (__m512d) _mm512_and_pd(__A, __B); } +TEST_CONSTEXPR(match_m512d(_mm512_and_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -0.0, -0.0, +0.0, +7.0, +7.0, +0.0, -0.0, -0.0)); __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_and_pd @@ -369,6 +375,7 @@ __m512 test_mm512_and_ps (__m512 __A, __m512 __B) { // CHECK: and <16 x i32> return (__m512) _mm512_and_ps(__A, __B); } +TEST_CONSTEXPR(match_m512(_mm512_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f)); __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_and_ps @@ -392,6 +399,7 @@ __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) { // CHECK: and <8 x i64> return (__m512d) _mm512_andnot_pd(__A, __B); } +TEST_CONSTEXPR(match_m512d(_mm512_andnot_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0)); __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_andnot_pd @@ -415,6 +423,7 @@ __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) { // CHECK: and <16 x i32> return (__m512) _mm512_andnot_ps(__A, __B); } +TEST_CONSTEXPR(match_m512(_mm512_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f)); __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_andnot_ps diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 84e700cfbd378..7a6f25885b112 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -2811,36 +2811,42 @@ __m512i test_mm512_and_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i _ // CHECK: and <16 x i32> return _mm512_and_epi32(__a, __b); } +TEST_CONSTEXPR(match_v16si(_mm512_and_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1)); __m512i test_mm512_and_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_and_epi64 // CHECK: and <8 x i64> return _mm512_and_epi64(__a, __b); } +TEST_CONSTEXPR(match_v8di(_mm512_and_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, 0, -1, 0, 0, 0, -1)); __m512i test_mm512_or_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_or_epi32 // CHECK: or <16 x i32> return _mm512_or_epi32(__a, __b); } +TEST_CONSTEXPR(match_v16si(_mm512_or_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1)); __m512i test_mm512_or_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_or_epi64 // CHECK: or <8 x i64> return _mm512_or_epi64(__a, __b); } +TEST_CONSTEXPR(match_v8di(_mm512_or_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, -1, 0, -1, -1, -1)); __m512i test_mm512_xor_epi32(__m512i __src,__mmask16 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_xor_epi32 // CHECK: xor <16 x i32> return _mm512_xor_epi32(__a, __b); } +TEST_CONSTEXPR(match_v16si(_mm512_xor_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0)); __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __b) { // CHECK-LABEL: test_mm512_xor_epi64 // CHECK: xor <8 x i64> return _mm512_xor_epi64(__a, __b); } +TEST_CONSTEXPR(match_v8di(_mm512_xor_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, -1, -1, 0, 0, -1, -1, 0)); __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){ // CHECK-LABEL: test_mm512_maskz_andnot_epi32 @@ -2869,6 +2875,7 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B) return _mm512_andnot_si512(__A, __B); } +TEST_CONSTEXPR(match_v8di(_mm512_andnot_si512((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0)); __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_andnot_epi32 @@ -2876,6 +2883,7 @@ __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) { // CHECK: and <16 x i32> %{{.*}}, %{{.*}} return _mm512_andnot_epi32(__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_andnot_epi32((__m512i)(__v16si){0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v16si){0, 0, -1, -1, 0, -1, 0, -1, 0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, 0)); __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_andnot_epi64 @@ -2900,6 +2908,7 @@ __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) { // CHECK: and <8 x i64> %{{.*}}, %{{.*}} return _mm512_andnot_epi64(__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_andnot_epi64((__m512i)(__v8di){0, -1, 0, -1, 0, 0, -1, -1}, (__m512i)(__v8di){0, 0, -1, -1, 0, -1, 0, -1}), 0, 0, -1, 0, 0, -1, 0, 0)); __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi32 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 04df59ef331a5..affb6e7d3b7d9 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -96,6 +96,7 @@ __m128i test_mm_and_si128(__m128i A, __m128i B) { // CHECK: and <2 x i64> return _mm_and_si128(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_and_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, 0, 0, -1)); __m128d test_mm_andnot_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_andnot_pd @@ -111,6 +112,7 @@ __m128i test_mm_andnot_si128(__m128i A, __m128i B) { // CHECK: and <2 x i64> return _mm_andnot_si128(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_andnot_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, 0, -1, 0)); __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu8 @@ -963,6 +965,7 @@ __m128i test_mm_or_si128(__m128i A, __m128i B) { // CHECK: or <2 x i64> %{{.*}}, %{{.*}} return _mm_or_si128(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_or_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, -1, -1, -1)); __m128i test_mm_packs_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_packs_epi16 @@ -1831,3 +1834,4 @@ __m128i test_mm_xor_si128(__m128i A, __m128i B) { // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_si128(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_xor_si128((__m128i)(__v4si){0, -1, 0, -1}, (__m128i)(__v4si){0, 0, -1, -1}), 0, -1, -1, 0));