diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 78e8a422db4c1..06d08a486a954 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2127,8 +2127,9 @@ _mm_add_epi32(__m128i __a, __m128i __b) { /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { - return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_si64(__m64 __a, + __m64 __b) { + return (__m64)(((__v1du)__a)[0] + ((__v1du)__b)[0]); } /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], @@ -2557,8 +2558,9 @@ _mm_sub_epi32(__m128i __a, __m128i __b) { /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { - return (__m64)((unsigned long long)__a - (unsigned long long)__b); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_si64(__m64 __a, + __m64 __b) { + return (__m64)(((__v1du)__a)[0] - ((__v1du)__b)[0]); } /// Subtracts the corresponding elements of two [2 x i64] vectors. diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index dc0fa5c523eeb..5a02a45512345 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -85,7 +85,7 @@ _mm_empty(void) { /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi32_si64(int __i) { return __extension__ (__m64)(__v2si){__i, 0}; @@ -102,7 +102,7 @@ _mm_cvtsi32_si64(int __i) /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi64_si32(__m64 __m) { return ((__v2si)__m)[0]; @@ -118,10 +118,10 @@ _mm_cvtsi64_si32(__m64 __m) /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi64_m64(long long __i) { - return (__m64)__i; + return __extension__ (__m64)(__v1di){__i}; } /// Casts a 64-bit integer vector into a 64-bit signed integer value. @@ -134,10 +134,10 @@ _mm_cvtsi64_m64(long long __i) /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. -static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 +static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtm64_si64(__m64 __m) { - return (long long)__m; + return ((__v1di)__m)[0]; } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -379,7 +379,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); @@ -400,7 +400,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); @@ -421,7 +421,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); @@ -536,7 +536,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); @@ -557,7 +557,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); @@ -578,7 +578,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); @@ -745,7 +745,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mullo_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); @@ -1134,7 +1134,7 @@ _mm_srli_si64(__m64 __m, int __count) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_and_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); @@ -1155,7 +1155,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_andnot_si64(__m64 __m1, __m64 __m2) { return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); @@ -1173,7 +1173,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_or_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); @@ -1191,7 +1191,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_xor_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); @@ -1213,7 +1213,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); @@ -1235,7 +1235,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); @@ -1257,7 +1257,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); @@ -1279,7 +1279,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { /* This function always performs a signed comparison, but __v8qi is a char @@ -1303,7 +1303,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { return (__m64)((__v4hi)__m1 > (__v4hi)__m2); @@ -1325,7 +1325,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { return (__m64)((__v2si)__m1 > (__v2si)__m2); diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 52cbe45ca238b..30e2cfeca1fd0 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -34,24 +34,28 @@ __m64 test_mm_add_pi8(__m64 a, __m64 b) { // CHECK: add <8 x i8> {{%.*}}, {{%.*}} return _mm_add_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_add_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), -21, +18, -15, +12, -9, +6, +9, -8)); __m64 test_mm_add_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_add_pi16 // CHECK: add <4 x i16> {{%.*}}, {{%.*}} return _mm_add_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_add_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -9, +6, +9, -8)); __m64 test_mm_add_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_add_pi32 // CHECK: add <2 x i32> {{%.*}}, {{%.*}} return _mm_add_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_add_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), -4, +5)); __m64 test_mm_add_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_add_si64 // CHECK: add i64 {{%.*}}, {{%.*}} return _mm_add_si64(a, b); } +TEST_CONSTEXPR(match_v1di(_mm_add_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), -58)); __m64 test_mm_adds_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_adds_pi8 @@ -88,6 +92,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) { // CHECK: and <1 x i64> {{%.*}}, {{%.*}} return _mm_and_si64(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_and_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, 0, -1)); __m64 test_mm_andnot_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_andnot_si64 @@ -95,6 +100,7 @@ __m64 test_mm_andnot_si64(__m64 a, __m64 b) { // CHECK: and <1 x i64> [[TMP]], {{%.*}} return _mm_andnot_si64(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_andnot_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, 0, -1, 0)); __m64 test_mm_avg_pu8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_avg_pu8 @@ -114,6 +120,7 @@ __m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) { // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8> return _mm_cmpeq_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_cmpeq_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), -1, 0, 0, -1, 0, -1, 0, -1)); __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpeq_pi16 @@ -121,6 +128,7 @@ __m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) { // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16> return _mm_cmpeq_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_cmpeq_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), 0, 0, -1, 0)); __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpeq_pi32 @@ -128,6 +136,7 @@ __m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) { // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32> return _mm_cmpeq_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_cmpeq_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), 0, -1)); __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpgt_pi8 @@ -135,6 +144,7 @@ __m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) { // CHECK-NEXT: {{%.*}} = sext <8 x i1> [[CMP]] to <8 x i8> return _mm_cmpgt_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_cmpgt_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-3, -2, +1, 0, -1, -2, -3, -4)), 0, -1, 0, 0, -1, 0, -1, 0)); __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpgt_pi16 @@ -142,6 +152,7 @@ __m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) { // CHECK-NEXT: {{%.*}} = sext <4 x i1> [[CMP]] to <4 x i16> return _mm_cmpgt_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_cmpgt_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, -1, +3, +4}), -1, 0, 0, 0)); __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_cmpgt_pi32 @@ -149,6 +160,7 @@ __m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) { // CHECK-NEXT: {{%.*}} = sext <2 x i1> [[CMP]] to <2 x i32> return _mm_cmpgt_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_cmpgt_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-5, -3}), -1, 0)); __m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) { // CHECK-LABEL: test_mm_cvt_pi2ps @@ -210,12 +222,14 @@ __m64 test_mm_cvtsi32_si64(int a) { // CHECK: insertelement <2 x i32> return _mm_cvtsi32_si64(a); } +TEST_CONSTEXPR(match_v2si(_mm_cvtsi32_si64(-127), -127, 0)); int test_mm_cvtsi64_si32(__m64 a) { // CHECK-LABEL: test_mm_cvtsi64_si32 // CHECK: extractelement <2 x i32> return _mm_cvtsi64_si32(a); } +TEST_CONSTEXPR(_mm_cvtsi64_si32((__m64)(__v4hi){-2, 0, -1, -1}) == 65534); __m64 test_mm_cvttpd_pi32(__m128d a) { // CHECK-LABEL: test_mm_cvttpd_pi32 @@ -240,11 +254,13 @@ __m64 test_m_from_int(int a) { // CHECK: insertelement <2 x i32> return _m_from_int(a); } +TEST_CONSTEXPR(match_v2si(_m_from_int(255), 255, 0)); __m64 test_m_from_int64(long long a) { // CHECK-LABEL: test_m_from_int64 return _m_from_int64(a); } +TEST_CONSTEXPR(match_v1di(_m_from_int64(-65536), -65536LL)); __m64 test_mm_hadd_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_hadd_pi16 @@ -367,12 +383,14 @@ __m64 test_mm_mullo_pi16(__m64 a, __m64 b) { // CHECK: mul <4 x i16> {{%.*}}, {{%.*}} return _mm_mullo_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_mullo_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -10, -16, +18, +16)); __m64 test_mm_or_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_or_si64 // CHECK: or <1 x i64> {{%.*}}, {{%.*}} return _mm_or_si64(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_or_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, -1)); __m64 test_mm_packs_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_packs_pi16 @@ -644,24 +662,28 @@ __m64 test_mm_sub_pi8(__m64 a, __m64 b) { // CHECK: sub <8 x i8> {{%.*}}, {{%.*}} return _mm_sub_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_sub_pi8(_mm_setr_pi8(-3, +2, -1, 0, +1, -2, +3, -4), _mm_setr_pi8(-18, +16, -14, +12, -10, +8, +6, -4)), +15, -14, +13, -12, +11, -10, -3, 0)); __m64 test_mm_sub_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sub_pi16 // CHECK: sub <4 x i16> {{%.*}}, {{%.*}} return _mm_sub_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_sub_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), +11, -10, -3, 0)); __m64 test_mm_sub_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sub_pi32 // CHECK: sub <2 x i32> {{%.*}}, {{%.*}} return _mm_sub_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_sub_pi32((__m64)(__v2si){+5, -3}, (__m64)(__v2si){-9, +8}), +14, -11)); __m64 test_mm_sub_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sub_si64 // CHECK: sub i64 {{%.*}}, {{%.*}} return _mm_sub_si64(a, b); } +TEST_CONSTEXPR(match_v1di(_mm_sub_si64((__m64)(__v1di){+42}, (__m64)(__v1di){-100}), +142)); __m64 test_mm_subs_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_subs_pi8 @@ -692,11 +714,13 @@ int test_m_to_int(__m64 a) { // CHECK: extractelement <2 x i32> return _m_to_int(a); } +TEST_CONSTEXPR(_m_to_int((__m64)(__v4hi){0, -2, -1, -1}) == -131072); long long test_m_to_int64(__m64 a) { // CHECK-LABEL: test_m_to_int64 return _m_to_int64(a); } +TEST_CONSTEXPR(_m_to_int64((__m64)(__v4hi){0, -2, 0, -1}) == -281470681874432LL); __m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_unpackhi_pi8 @@ -739,3 +763,4 @@ __m64 test_mm_xor_si64(__m64 a, __m64 b) { // CHECK: xor <1 x i64> {{%.*}}, {{%.*}} return _mm_xor_si64(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_xor_si64((__m64)(__v4hi){0, -1, 0, -1}, (__m64)(__v4hi){0, 0, -1, -1}), 0, -1, -1, 0));