diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 3ba21cd73888c..74343c33ba76f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -835,45 +835,38 @@ _mm512_xor_si512(__m512i __a, __m512i __b) /* Arithmetic */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_add_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a + (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_add_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a + (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_mul_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a * (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_mul_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mul_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a * (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_sub_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a - (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_sub_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sub_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a - (__v16sf)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_add_epi64 (__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_add_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } @@ -2315,9 +2308,8 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R))) -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_div_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a/(__v8df)__b); } @@ -2335,9 +2327,8 @@ _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_div_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_div_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a/(__v16sf)__b); } @@ -4123,9 +4114,8 @@ _mm512_cvtss_f32(__m512 __a) /* Unpack and Interleave */ -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } @@ -4146,9 +4136,8 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_pd(__m512d __a, __m512d __b) -{ +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } @@ -4169,9 +4158,8 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_unpackhi_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpackhi_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 2, 18, 3, 19, 2+4, 18+4, 3+4, 19+4, @@ -4195,9 +4183,8 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) (__v16sf)_mm512_setzero_ps()); } -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_unpacklo_ps(__m512 __a, __m512 __b) -{ +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_unpacklo_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 0, 16, 1, 17, 0+4, 16+4, 1+4, 17+4, diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 1da50f0211954..2be4f68067a5c 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -87,9 +87,8 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); /// A 256-bit vector of [4 x double] containing one of the source operands. /// \returns A 256-bit vector of [4 x double] containing the sums of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_add_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_add_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a+(__v4df)__b); } @@ -105,9 +104,8 @@ _mm256_add_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the source operands. /// \returns A 256-bit vector of [8 x float] containing the sums of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_add_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a+(__v8sf)__b); } @@ -123,9 +121,8 @@ _mm256_add_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the subtrahend. /// \returns A 256-bit vector of [4 x double] containing the differences between /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_sub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_sub_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a-(__v4df)__b); } @@ -141,9 +138,8 @@ _mm256_sub_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the subtrahend. /// \returns A 256-bit vector of [8 x float] containing the differences between /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_sub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a-(__v8sf)__b); } @@ -197,9 +193,8 @@ _mm256_addsub_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing the divisor. /// \returns A 256-bit vector of [4 x double] containing the quotients of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_div_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_div_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a/(__v4df)__b); } @@ -215,9 +210,8 @@ _mm256_div_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing the divisor. /// \returns A 256-bit vector of [8 x float] containing the quotients of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_div_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a/(__v8sf)__b); } @@ -317,9 +311,8 @@ _mm256_min_ps(__m256 __a, __m256 __b) /// A 256-bit vector of [4 x double] containing one of the operands. /// \returns A 256-bit vector of [4 x double] containing the products of both /// operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_mul_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_mul_pd(__m256d __a, __m256d __b) { return (__m256d)((__v4df)__a * (__v4df)__b); } @@ -335,9 +328,8 @@ _mm256_mul_pd(__m256d __a, __m256d __b) /// A 256-bit vector of [8 x float] containing one of the operands. /// \returns A 256-bit vector of [8 x float] containing the products of both /// operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_mul_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, + __m256 __b) { return (__m256)((__v8sf)__a * (__v8sf)__b); } @@ -2462,9 +2454,8 @@ _mm256_movedup_pd(__m256d __a) /// Bits [127:64] are written to bits [127:64] of the return value. \n /// Bits [255:192] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_unpackhi_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpackhi_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2); } @@ -2484,9 +2475,8 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b) /// Bits [63:0] are written to bits [127:64] of the return value. \n /// Bits [191:128] are written to bits [255:192] of the return value. \n /// \returns A 256-bit vector of [4 x double] containing the interleaved values. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_unpacklo_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpacklo_pd(__m256d __a, __m256d __b) { return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2); } @@ -2511,9 +2501,8 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b) /// Bits [223:192] are written to bits [191:160] of the return value. \n /// Bits [255:224] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_unpackhi_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpackhi_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); } @@ -2538,9 +2527,8 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b) /// Bits [159:128] are written to bits [191:160] of the return value. \n /// Bits [191:160] are written to bits [255:224] of the return value. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_unpacklo_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_unpacklo_ps(__m256 __a, __m256 __b) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 2d437649f75c2..e2c9f96139fc0 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -20,12 +20,14 @@ __m256d test_mm256_add_pd(__m256d A, __m256d B) { // CHECK: fadd <4 x double> return _mm256_add_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_add_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), -8.0, -10.0, +12.0, +14.0)); __m256 test_mm256_add_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_add_ps // CHECK: fadd <8 x float> return _mm256_add_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_add_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), -8.0f, -10.0f, +12.0f, +14.0f, +14.0f, +12.0f, -10.0f, -8.0f)); __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_addsub_pd @@ -977,12 +979,14 @@ __m256d test_mm256_div_pd(__m256d A, __m256d B) { // CHECK: fdiv <4 x double> return _mm256_div_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_div_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-1.0, +1.0, -1.0, +1.0}), +4.0, -5.0, -6.0, +7.0)); __m256 test_mm256_div_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_div_ps // CHECK: fdiv <8 x float> return _mm256_div_ps(A, B); } +TEST_CONSTEXPR(match_m256( _mm256_div_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-1.0f, +1.0f, -1.0f, +1.0f, +1.0f, -1.0f, +1.0f, -1.0f}), +4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, +4.0f)); __m256 test_mm256_dp_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_dp_ps @@ -1295,12 +1299,14 @@ __m256d test_mm256_mul_pd(__m256d A, __m256d B) { // CHECK: fmul <4 x double> return _mm256_mul_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_mul_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), +16.0, +25.0, +36.0, +49.0)); __m256 test_mm256_mul_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_mul_ps // CHECK: fmul <8 x float> return _mm256_mul_ps(A, B); } +TEST_CONSTEXPR(match_m256( _mm256_mul_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), +16.0f, +25.0f, +36.0f, +49.0f, +49.0f, +36.0f, +25.0f, +16.0f)); __m256d test_mm256_or_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_or_pd @@ -1933,12 +1939,14 @@ __m256d test_mm256_sub_pd(__m256d A, __m256d B) { // CHECK: fsub <4 x double> return _mm256_sub_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_sub_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-0.0, +0.0, +2.0, -1.0}), -4.0, -5.0, 4.0, 8.0)); __m256 test_mm256_sub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_sub_ps // CHECK: fsub <8 x float> return _mm256_sub_ps(A, B); } +TEST_CONSTEXPR(match_m256( _mm256_sub_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-0.0f, +0.0f, +2.0f, -1.0f, -1.0f, +2.0f, +0.0f, -0.0f}), -4.0f, -5.0f, 4.0f, 8.0f, 8.0f, 4.0f, -5.0f, -4.0f)); int test_mm_testc_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testc_pd @@ -2062,24 +2070,28 @@ __m256d test_mm256_unpackhi_pd(__m256d A, __m256d B) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_unpackhi_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_unpackhi_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +2.0, +6.0, +4.0, +8.0)); __m256 test_mm256_unpackhi_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_unpackhi_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_unpackhi_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_unpackhi_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +2.0f, +12.0f, +3.0f, +13.0f, +6.0f, +16.0f, +7.0f, +17.0f)); __m256d test_mm256_unpacklo_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_unpacklo_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_unpacklo_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_unpacklo_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +1.0, +5.0, +3.0, +7.0)); __m256 test_mm256_unpacklo_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_unpacklo_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_unpacklo_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_unpacklo_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +0.0f, +10.0f, +1.0f, +11.0f, +4.0f, +14.0f, +5.0f, +15.0f)); __m256d test_mm256_xor_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_xor_pd diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 54470358a94e1..8c14c571d96a7 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -154,6 +154,7 @@ __m512 test_mm512_add_ps(__m512 a, __m512 b) // CHECK: fadd <16 x float> return _mm512_add_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_add_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), -2.0f, -4.0f, -6.0f, -8.0f, -10.0f, -12.0f, -14.0f, -16.0f, +2.0f, +4.0f, +6.0f, +8.0f, +10.0f, +12.0f, +14.0f, +16.0f)); __m512d test_mm512_add_pd(__m512d a, __m512d b) { @@ -161,6 +162,7 @@ __m512d test_mm512_add_pd(__m512d a, __m512d b) // CHECK: fadd <8 x double> return _mm512_add_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_add_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), -2.0, -4.0, -6.0, -8.0, +2.0, +4.0, +6.0, +8.0)); __m512 test_mm512_mul_ps(__m512 a, __m512 b) { @@ -168,6 +170,7 @@ __m512 test_mm512_mul_ps(__m512 a, __m512 b) // CHECK: fmul <16 x float> return _mm512_mul_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_mul_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f, +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f)); __m512d test_mm512_mul_pd(__m512d a, __m512d b) { @@ -175,6 +178,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m512d b) // CHECK: fmul <8 x double> return _mm512_mul_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_mul_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), +1.0, +4.0, +9.0, +16.0, +1.0, +4.0, +9.0, +16.0)); void test_mm512_storeu_si512 (void *__P, __m512i __A) { @@ -1261,6 +1265,7 @@ __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b) // CHECK: shufflevector <8 x double> {{.*}} return _mm512_unpackhi_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_unpackhi_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +2.0, +10.0, +4.0, +12.0, +6.0, +14.0, +8.0, +16.0)); __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) { @@ -1268,6 +1273,7 @@ __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) // CHECK: shufflevector <8 x double> {{.*}} return _mm512_unpacklo_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_unpacklo_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +1.0, +9.0, +3.0, +11.0, +5.0, +13.0, +7.0, +15.0)); __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b) { @@ -1275,6 +1281,7 @@ __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b) // CHECK: shufflevector <16 x float> {{.*}} return _mm512_unpackhi_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_unpackhi_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +2.0f, +18.0f, +3.0f, +19.0f, +6.0f, +22.0f, +7.0f, +23.0f, +10.0f, +26.0f, +11.0f, +27.0f, +14.0f, +30.0f, +15.0f, +31.0f)); __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b) { @@ -1282,6 +1289,7 @@ __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b) // CHECK: shufflevector <16 x float> {{.*}} return _mm512_unpacklo_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_unpacklo_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +0.0f, +16.0f, +1.0f, +17.0f, +4.0f, +20.0f, +5.0f, +21.0f, +8.0f, +24.0f, +9.0f, +25.0f, +12.0f, +28.0f, +13.0f, +29.0f)); __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_round_ps_mask @@ -3551,6 +3559,7 @@ __m512d test_mm512_div_pd(__m512d __a, __m512d __b) { // CHECK: fdiv <8 x double> return _mm512_div_pd(__a,__b); } +TEST_CONSTEXPR(match_m512d(_mm512_div_pd((__m512d){+8.0, +6.0, +4.0, +2.0, -8.0, -6.0, -4.0, -2.0}, (__m512d){+2.0, +2.0, +2.0, +2.0, -2.0, -2.0, -2.0, -2.0}), +4.0, +3.0, +2.0, +1.0, +4.0, +3.0, +2.0, +1.0)); __m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { // CHECK-LABEL: test_mm512_mask_div_pd // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}} @@ -3585,6 +3594,7 @@ __m512 test_mm512_div_ps(__m512 __A, __m512 __B) { // CHECK: fdiv <16 x float> return _mm512_div_ps(__A,__B); } +TEST_CONSTEXPR(match_m512(_mm512_div_ps((__m512){+16.0f, +14.0f, +12.0f, +10.0f, +8.0f, +6.0f, +4.0f, +2.0f, -16.0f, -14.0f, -12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f}, (__m512){+2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f}), +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f, +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f)); __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: test_mm512_mask_div_ps // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}}