From f0f99274e8b5d48ac12fafbc70b99cbf3b4fdf99 Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 8 Sep 2025 19:54:43 -0400 Subject: [PATCH 01/15] [clang][x86][headers] Make SSE2 add/sub intrinsics constexpr --- clang/lib/Headers/emmintrin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index e4fbe011239d6..ec87180667234 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2059,7 +2059,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } @@ -2080,7 +2080,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } @@ -2498,7 +2498,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } @@ -2515,7 +2515,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } From 590c0e1a3824140efcc209b3eac480fa2f1fe9a4 Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 8 Sep 2025 20:05:11 -0400 Subject: [PATCH 02/15] [clang][x86][headers] Make AVX2 add/sub intrinsics constexpr --- clang/lib/Headers/avx2intrin.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index fc12a9bf15e57..37e08b0e48493 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -279,7 +279,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); @@ -298,7 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); @@ -317,7 +317,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); @@ -336,7 +336,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); @@ -2460,7 +2460,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); @@ -2487,7 +2487,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); @@ -2513,7 +2513,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); @@ -2539,7 +2539,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); From dca479e3bdf392f36856b9a7bc79b2bfd270875f Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 8 Sep 2025 20:19:28 -0400 Subject: [PATCH 03/15] [clang][x86][headers] Make AVX-512 epi64 add/sub intrinsics constexpr Fixes #152490 --- clang/lib/Headers/avx512fintrin.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 7ba09039cd826..345d0e0e3898a 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -859,7 +859,7 @@ _mm512_add_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -867,7 +867,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -875,13 +875,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -889,7 +889,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -897,7 +897,7 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); @@ -919,7 +919,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); From 0d4b8005b3b336aa035f779c097289a5ded656d0 Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 8 Sep 2025 21:48:53 -0400 Subject: [PATCH 04/15] [clang][x86][headers] Make AVX-512F masked epi32 add/sub constexpr Fixes llvm/llvm-project#152490 --- clang/lib/Headers/avx512fintrin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 345d0e0e3898a..ff7b03b20df6f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -903,7 +903,7 @@ _mm512_add_epi32 (__m512i __A, __m512i __B) return (__m512i) ((__v16su) __A + (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -911,7 +911,7 @@ _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -925,7 +925,7 @@ _mm512_sub_epi32 (__m512i __A, __m512i __B) return (__m512i) ((__v16su) __A - (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -933,7 +933,7 @@ _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, From c70762ab4ac729f18ba548bb3fd3ccefd5b20be1 Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 8 Sep 2025 21:53:07 -0400 Subject: [PATCH 05/15] [clang][x86][headers] Make AVX-512BW masked add/sub (epi8/epi16) constexpr Fixes llvm/llvm-project#152490. --- clang/lib/Headers/avx512bwintrin.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 42fce7d89e1bb..0fc2f5bf2eda7 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -369,76 +369,76 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_sub_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), From 8b22bd7631aec621189d34f97bcf667141aeded3 Mon Sep 17 00:00:00 2001 From: donneypr Date: Tue, 9 Sep 2025 07:51:26 -0400 Subject: [PATCH 06/15] clang-format: touched lines in x86 add/sub headers for #152490 --- clang/lib/Headers/avx2intrin.h | 24 ++++++++-------------- clang/lib/Headers/avx512bwintrin.h | 10 ++++----- clang/lib/Headers/avx512fintrin.h | 33 ++++++++++-------------------- clang/lib/Headers/emmintrin.h | 16 +++++++-------- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 37e08b0e48493..49ae71539381a 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -280,8 +280,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2) /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi8(__m256i __a, __m256i __b) -{ +_mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } @@ -299,8 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi16(__m256i __a, __m256i __b) -{ +_mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } @@ -318,8 +316,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi32(__m256i __a, __m256i __b) -{ +_mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } @@ -337,8 +334,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the sums. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_add_epi64(__m256i __a, __m256i __b) -{ +_mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } @@ -2461,8 +2457,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count) /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi8(__m256i __a, __m256i __b) -{ +_mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } @@ -2488,8 +2483,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi16(__m256i __a, __m256i __b) -{ +_mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } @@ -2514,8 +2508,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] containing the subtrahends. /// \returns A 256-bit vector of [8 x i32] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi32(__m256i __a, __m256i __b) -{ +_mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } @@ -2540,8 +2533,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] containing the subtrahends. /// \returns A 256-bit vector of [4 x i64] containing the differences. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_sub_epi64(__m256i __a, __m256i __b) -{ +_mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 0fc2f5bf2eda7..c5e20baf38723 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -369,8 +369,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_add_epi8 (__m512i __A, __m512i __B) { +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } @@ -389,7 +389,7 @@ _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi8 (__m512i __A, __m512i __B) { +_mm512_sub_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } @@ -408,7 +408,7 @@ _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_add_epi16 (__m512i __A, __m512i __B) { +_mm512_add_epi16(__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } @@ -427,7 +427,7 @@ _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi16 (__m512i __A, __m512i __B) { +_mm512_sub_epi16(__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index ff7b03b20df6f..7f6cebbe3bc7c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -860,82 +860,71 @@ _mm512_add_epi64(__m512i __A, __m512i __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_add_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi64 (__m512i __A, __m512i __B) -{ +_mm512_sub_epi64(__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_sub_epi64(__A, __B), (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_add_epi32 (__m512i __A, __m512i __B) -{ +_mm512_add_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_add_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_sub_epi32 (__m512i __A, __m512i __B) -{ +_mm512_sub_epi32(__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR -_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_sub_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index ec87180667234..9345b72ab5da4 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2059,8 +2059,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } @@ -2080,8 +2080,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a, /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } @@ -2498,8 +2498,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } @@ -2515,8 +2515,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } From 287254a8b1b91eb0b183ed62ed29dbaa6a6aaa1d Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 15 Sep 2025 17:09:49 -0400 Subject: [PATCH 07/15] formatted only touched lines --- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/emmintrin.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index c5e20baf38723..e82ae2fb20bdc 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -369,8 +369,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i - __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR + _mm512_add_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 9345b72ab5da4..4864ac5a22234 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2059,8 +2059,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, /// A 128-bit vector of [16 x i8]. /// \returns A 128-bit vector of [16 x i8] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a + (__v16qu)__b); } @@ -2080,8 +2080,8 @@ _mm_add_epi8_CONSTEXPR(__m128i __a, __m128i __b) { /// A 128-bit vector of [8 x i16]. /// \returns A 128-bit vector of [8 x i16] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_add_epi16_CONSTEXPR(__m128i __a, __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a + (__v8hu)__b); } @@ -2515,8 +2515,8 @@ _mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sub_epi16_CONSTEXPR(__m128i __a, __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi16(__m128i __a, __m128i __b) { return (__m128i)((__v8hu)__a - (__v8hu)__b); } From 25a40771ddc1dee78ce6959f1f7d7c835a9b1011 Mon Sep 17 00:00:00 2001 From: donneypr Date: Mon, 15 Sep 2025 17:17:43 -0400 Subject: [PATCH 08/15] made _mm_sub_epi8 CONSTEXPR --- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/emmintrin.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index e82ae2fb20bdc..c5e20baf38723 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -369,8 +369,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR - _mm512_add_epi8(__m512i __A, __m512i __B) { +static __inline__ __m512i + __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_add_epi8(__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 4864ac5a22234..6c835bc3771fc 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2498,8 +2498,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sub_epi8_CONSTEXPR(__m128i __a, __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi8(__m128i __a, __m128i __b) { return (__m128i)((__v16qu)__a - (__v16qu)__b); } From 873f6a092aff3689abeca188e09e4b8362fab04c Mon Sep 17 00:00:00 2001 From: donneypr Date: Fri, 19 Sep 2025 14:40:58 -0400 Subject: [PATCH 09/15] added tests for all add arithmitics llvm#152490 --- clang/test/CodeGen/X86/avx2-builtins.c | 8 ++++++++ clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 15 +++++++++++++++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 17 +++++++++++++++++ clang/test/CodeGen/X86/sse2-builtins.c | 10 ++++++++-- 6 files changed, 72 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index aeb1aee4ea946..0437403b14c20 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -48,24 +48,32 @@ __m256i test_mm256_add_epi8(__m256i a, __m256i b) { return _mm256_add_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_add_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 85, -87, 89, 91, -63, 95, -97, 99, 101, -63, 63, -63, 63, 63, -63, -63, -117, 63, -121, 123, 63, 127)); + __m256i test_mm256_add_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi16 // CHECK: add <16 x i16> return _mm256_add_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_add_epi16((__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -33, -31, 37, -39, -31, 31, -31, -31, -31, -31, -53, 31, -57, -31, 31, 31)); + __m256i test_mm256_add_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi32 // CHECK: add <8 x i32> return _mm256_add_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_add_epi32((__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 15, 19, 21, -15, 25, -15, 29, 31)); + __m256i test_mm256_add_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi64 // CHECK: add <4 x i64> return _mm256_add_epi64(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_add_epi64((__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 7, -11, 13, 15)); + __m256i test_mm256_adds_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_adds_epi8 // CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 9d605efcbd758..4cffb38d6205a 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -752,6 +752,8 @@ __m512i test_mm512_add_epi8 (__m512i __A, __m512i __B) { return _mm512_add_epi8(__A,__B); } +TEST_CONSTEXPR(match_v64qi(_mm512_add_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 125, -127, 127, 127, 127, 115, 113, -111, -109, -127, -105, 103, 101, -99, -127, 127, 93, 127, -89, 127, 127, -83, -81, -127, 127, -75, 73, 71, 127, 67, 127, 127, 61, 59, 127, 55, -127, 51, 127, -127, 45, 127, -41, 127, 127, -35, -127, 127, 29, -27, 25, 127, -127, -127, 127, 127, -127, -11, -9, -7, 5, -3, 1)); + __m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_add_epi8 //CHECK: add <64 x i8> %{{.*}}, %{{.*}} @@ -759,6 +761,8 @@ __m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m51 return _mm512_mask_add_epi8(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_add_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 99, 99, 99, 127, 127, 99, 113, 99, 99, 99, -105, 99, 101, 99, 99, 127, 99, 99, -89, 127, 127, -83, 99, 99, 127, 99, 73, 71, 127, 99, 127, 99, 99, 99, 127, 55, 99, 51, 99, -127, 45, 99, 99, 127, 99, -35, -127, 99, 99, 99, 25, 99, 99, 99, 127, 99, 99, -11, 99, 99, 5, 99, 99)); + __m512i test_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi8 //CHECK: add <64 x i8> %{{.*}}, %{{.*}} @@ -766,6 +770,8 @@ __m512i test_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_maskz_add_epi8(__U, __A, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_add_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 0, 0, 0, 127, 127, 0, 113, 0, 0, 0, -105, 0, 101, 0, 0, 127, 0, 0, -89, 127, 127, -83, 0, 0, 127, 0, 73, 71, 127, 0, 127, 0, 0, 0, 127, 55, 0, 51, 0, -127, 45, 0, 0, 127, 0, -35, -127, 0, 0, 0, 25, 0, 0, 0, 127, 0, 0, -11, 0, 0, 5, 0, 0)); + __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi8 //CHECK: sub <64 x i8> @@ -792,6 +798,8 @@ __m512i test_mm512_add_epi16 (__m512i __A, __m512i __B) { return _mm512_add_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_add_epi16((__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 65, 67, 63, 63, -63, 75, -77, 63, 63, -83, 63, -63, 89, 63, -63, -63, 63, 63, 101, -103, -105, -63, 63, 63, -63, 63, -117, 63, 63, -63, 63, 63)); + __m512i test_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_add_epi16 //CHECK: add <32 x i16> %{{.*}}, %{{.*}} @@ -799,6 +807,8 @@ __m512i test_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m5 return _mm512_mask_add_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_add_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 65, 67, 99, 99, 99, 75, 99, 99, 63, 99, 99, 99, 99, 63, -63, -63, 63, 99, 101, 99, 99, 99, 99, 63, 99, 99, 99, 63, 63, -63, 99, 63)); + __m512i test_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi16 //CHECK: add <32 x i16> %{{.*}}, %{{.*}} @@ -806,6 +816,8 @@ __m512i test_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_maskz_add_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_add_epi16(0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 65, 67, 0, 0, 0, 75, 0, 0, 63, 0, 0, 0, 0, 63, -63, -63, 63, 0, 101, 0, 0, 0, 0, 63, 0, 0, 0, 63, 63, -63, 0, 63)); + __m512i test_mm512_sub_epi16 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi16 //CHECK: sub <32 x i16> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f93216e546a63..ed4824cbbfcbe 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3048,6 +3048,8 @@ __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { return _mm512_maskz_add_epi32(__k,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_add_epi32(0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -31, -31, 0, 0, 31, 31, -45, -31, 49, 51, 0, 0, -31, -31, 0, -63)); + __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_add_epi32 @@ -3056,12 +3058,16 @@ __m512i test_mm512_mask_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B, return _mm512_mask_add_epi32(__src,__k,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_add_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -31, -31, 99, 99, 31, 31, -45, -31, 49, 51, 99, 99, -31, -31, 99, -63)); + __m512i test_mm512_add_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi32 //CHECK: add <16 x i32> return _mm512_add_epi32(__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_add_epi32((__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -31, -31, -37, 31, 31, 31, -45, -31, 49, 51, -31, 31, -31, -31, 31, -63)); + __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi64 //CHECK: add <8 x i64> %{{.*}}, %{{.*}} @@ -3069,6 +3075,8 @@ __m512i test_mm512_maskz_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { return _mm512_maskz_add_epi64(__k,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_add_epi64(0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 0, 15, 0, 23, 0, 15, 15, 0)); + __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_add_epi64 @@ -3077,12 +3085,16 @@ __m512i test_mm512_mask_add_epi64 (__mmask8 __k,__m512i __A, __m512i __B, return _mm512_mask_add_epi64(__src,__k,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_add_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 99, 15, 99, 23, 99, 15, 15, 99)); + __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi64 //CHECK: add <8 x i64> return _mm512_add_epi64(__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_add_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), -17, 15, -21, 23, -15, 15, 15, 31)); + __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mul_epi32 //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32) diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 9daecd0d9875f..893187350b61b 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -615,6 +615,8 @@ __m256i test_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, return _mm256_mask_add_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_add_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 15, 99, 99, 99, 99, -15, 99, 31)); + __m256i test_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_add_epi32 //CHECK: add <8 x i32> %{{.*}}, %{{.*}} @@ -622,6 +624,8 @@ __m256i test_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_maskz_add_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_add_epi32(0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 15, 0, 0, 0, 0, -15, 0, 31)); + __m256i test_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_add_epi64 @@ -630,6 +634,8 @@ __m256i test_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, return _mm256_mask_add_epi64(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_add_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 99, 99, 99, 99)); + __m256i test_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_add_epi64 //CHECK: add <4 x i64> %{{.*}}, %{{.*}} @@ -637,6 +643,8 @@ __m256i test_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_maskz_add_epi64 (__U,__A,__B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_add_epi64(0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 0, 0, 0, 0)); + __m256i test_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_sub_epi32 @@ -675,6 +683,7 @@ __m128i test_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, return _mm_mask_add_epi32(__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_add_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 99, 7, 99, 15)); __m128i test_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_add_epi32 @@ -683,6 +692,8 @@ __m128i test_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_maskz_add_epi32 (__U,__A,__B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_add_epi32(0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 0, 7, 0, 15)); + __m128i test_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_add_epi64 @@ -691,6 +702,8 @@ __m128i test_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, return _mm_mask_add_epi64 (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_add_epi64((__m128i)(__v2di){ 99, 99}, 0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 5, -7)); + __m128i test_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_add_epi64 //CHECK: add <2 x i64> %{{.*}}, %{{.*}} @@ -698,6 +711,8 @@ __m128i test_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_maskz_add_epi64 (__U,__A,__B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_add_epi64(0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 5, -7)); + __m128i test_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_sub_epi32 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index d62235a630fd8..e5e0df7bca5ed 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -751,12 +751,17 @@ __m256i test_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m25 return _mm256_mask_add_epi8(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_add_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 99, -87, 99, 91, -63, 99, -97, 99, 101, 99, 99, -63, 63, 99, 99, 99, 99, 99, 99, 123, 99, 99)); + __m256i test_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_add_epi8 //CHECK: add <32 x i8> %{{.*}}, %{{.*}} //CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_add_epi8(__U , __A, __B); } + +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_add_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 0, -87, 0, 91, -63, 0, -97, 99, 101, 0, 0, -63, 63, 0, 0, 0, 0, 0, 0, 123, 0, 0)); + __m256i test_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_add_epi16 //CHECK: add <16 x i16> %{{.*}}, %{{.*}} @@ -764,6 +769,8 @@ __m256i test_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m2 return _mm256_mask_add_epi16(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_add_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -33, 99, 99, 99, 99, 31, 99, 99, 99, -31, 99, 31, -57, 99, 31, 99)); + __m256i test_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_add_epi16 //CHECK: add <16 x i16> %{{.*}}, %{{.*}} @@ -771,6 +778,8 @@ __m256i test_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_maskz_add_epi16(__U , __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_add_epi16(0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -33, 0, 0, 0, 0, 31, 0, 0, 0, -31, 0, 31, -57, 0, 31, 0)); + __m256i test_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_sub_epi8 //CHECK: sub <32 x i8> %{{.*}}, %{{.*}} @@ -806,6 +815,8 @@ __m128i test_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i return _mm_mask_add_epi8(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_add_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 99, 99, -31, 99, 99, 47, 99, 31, -31, 99, -31, 99, 31, 99)); + __m128i test_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_add_epi8 //CHECK: add <16 x i8> %{{.*}}, %{{.*}} @@ -813,6 +824,8 @@ __m128i test_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_maskz_add_epi8(__U , __A, __B); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_add_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 0, 0, -31, 0, 0, 47, 0, 31, -31, 0, -31, 0, 31, 0)); + __m128i test_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_add_epi16 //CHECK: add <8 x i16> %{{.*}}, %{{.*}} @@ -820,6 +833,8 @@ __m128i test_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i return _mm_mask_add_epi16(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_add_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -15, 99, -15, -23, 99, 99, 99, 15)); + __m128i test_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_add_epi16 //CHECK: add <8 x i16> %{{.*}}, %{{.*}} @@ -827,6 +842,8 @@ __m128i test_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_maskz_add_epi16(__U , __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_add_epi16(0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -15, 0, -15, -23, 0, 0, 0, 15)); + __m128i test_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_sub_epi8 //CHECK: sub <16 x i8> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 0ba32bb230cdd..6f9975fa24939 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -32,25 +32,31 @@ __m128i test_mm_add_epi8(__m128i A, __m128i B) { return _mm_add_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_add_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 31, -39, -31, -43, 31, 47, -31, 31, -31, 55, -31, -31, 31, -31)); + __m128i test_mm_add_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi16 // CHECK: add <8 x i16> return _mm_add_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_add_epi16((__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -15, -15, -15, -23, -25, 15, 29, 15)); + __m128i test_mm_add_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi32 // CHECK: add <4 x i32> return _mm_add_epi32(A, B); } -TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8)); + +TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 7, 7, 13, 15)); __m128i test_mm_add_epi64(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi64 // CHECK: add <2 x i64> return _mm_add_epi64(A, B); } -TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5)); + +TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 5, -7)); __m128d test_mm_add_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_add_pd From 01a003a5d34ca63d9ea010dc29b7bee691d68ed1 Mon Sep 17 00:00:00 2001 From: donneypr Date: Fri, 19 Sep 2025 15:02:39 -0400 Subject: [PATCH 10/15] added tests for all sub arithmitics llvm#152490 --- clang/test/CodeGen/X86/avx2-builtins.c | 8 ++++++++ clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512vl-builtins.c | 16 ++++++++++++++++ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 16 ++++++++++++++++ clang/test/CodeGen/X86/sse2-builtins.c | 10 ++++++++-- 6 files changed, 72 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 0437403b14c20..7000089db885b 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1368,24 +1368,32 @@ __m256i test_mm256_sub_epi8(__m256i a, __m256i b) { return _mm256_sub_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_sub_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 63, -63, 63, 63, -93, 63, -63, 63, 63, -103, 105, -107, 109, 111, -113, -115, -63, 119, -63, 63, 125, 63)); + __m256i test_mm256_sub_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi16 // CHECK: sub <16 x i16> return _mm256_sub_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_sub_epi16((__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -31, -35, 31, -31, -41, 43, -45, -47, -49, -51, -31, 55, -31, -59, 61, 63)); + __m256i test_mm256_sub_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi32 // CHECK: sub <8 x i32> return _mm256_sub_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_sub_epi32((__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 17, 15, 15, -23, 15, -27, 15, 15)); + __m256i test_mm256_sub_epi64(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi64 // CHECK: sub <4 x i64> return _mm256_sub_epi64(a, b); } +TEST_CONSTEXPR(match_v4di(_mm256_sub_epi64((__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 9, -7, 7, 7)); + __m256i test_mm256_subs_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_subs_epi8 // CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 4cffb38d6205a..bf5534c0e5155 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -778,6 +778,8 @@ __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) { return _mm512_sub_epi8(__A, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_sub_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, -127, 123, -121, -119, -117, -127, -127, 127, 127, 107, 127, -127, -127, 127, 97, -95, -127, -91, 127, -87, -85, 127, 127, 79, -77, 127, -127, -127, -69, -127, -65, -63, -127, -127, -57, -127, 53, -127, -49, 47, -127, -43, 127, -39, -37, 127, 33, -31, -127, 127, -127, -23, 21, 19, -17, -15, 13, 127, 127, 127, -127, 127, -127)); + __m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_sub_epi8 //CHECK: sub <64 x i8> %{{.*}}, %{{.*}} @@ -785,6 +787,8 @@ __m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m51 return _mm512_mask_sub_epi8(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_sub_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 99, 99, 99, -119, -117, 99, -127, 99, 99, 99, 127, 99, -127, 99, 99, -95, 99, 99, 127, -87, -85, 127, 99, 99, -77, 99, -127, -127, -69, 99, -65, 99, 99, 99, -57, -127, 99, -127, 99, 47, -127, 99, 99, -39, 99, 127, 33, 99, 99, 99, -127, 99, 99, 99, -17, 99, 99, 127, 99, 99, -127, 99, 99)); + __m512i test_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi8 //CHECK: sub <64 x i8> %{{.*}}, %{{.*}} @@ -792,6 +796,8 @@ __m512i test_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_maskz_sub_epi8(__U, __A, __B); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_sub_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 0, 0, 0, -119, -117, 0, -127, 0, 0, 0, 127, 0, -127, 0, 0, -95, 0, 0, 127, -87, -85, 127, 0, 0, -77, 0, -127, -127, -69, 0, -65, 0, 0, 0, -57, -127, 0, -127, 0, 47, -127, 0, 0, -39, 0, 127, 33, 0, 0, 0, -127, 0, 0, 0, -17, 0, 0, 127, 0, 0, -127, 0, 0)); + __m512i test_mm512_add_epi16 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi16 //CHECK: add <32 x i16> @@ -824,6 +830,8 @@ __m512i test_mm512_sub_epi16 (__m512i __A, __m512i __B) { return _mm512_sub_epi16(__A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_sub_epi16((__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 63, 63, 69, 71, -73, 63, -63, 79, 81, -63, 85, -87, 63, 91, -93, -95, 97, 99, 63, -63, -63, -107, 109, 111, -113, 115, -63, 119, 121, -123, 125, 127)); + __m512i test_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_sub_epi16 //CHECK: sub <32 x i16> %{{.*}}, %{{.*}} @@ -831,6 +839,8 @@ __m512i test_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m5 return _mm512_mask_sub_epi16(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_sub_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 63, 63, 99, 99, 99, 63, 99, 99, 81, 99, 99, 99, 99, 91, -93, -95, 97, 99, 63, 99, 99, 99, 99, 111, 99, 99, 99, 119, 121, -123, 99, 127)); + __m512i test_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi16 //CHECK: sub <32 x i16> %{{.*}}, %{{.*}} @@ -838,6 +848,8 @@ __m512i test_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_maskz_sub_epi16(__U, __A, __B); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sub_epi16(0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 63, 63, 0, 0, 0, 63, 0, 0, 81, 0, 0, 0, 0, 91, -93, -95, 97, 0, 63, 0, 0, 0, 0, 111, 0, 0, 0, 119, 121, -123, 0, 127)); + __m512i test_mm512_mullo_epi16 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mullo_epi16 //CHECK: mul <32 x i16> diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index ed4824cbbfcbe..14beb88dba6f4 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -3006,6 +3006,8 @@ __m512i test_mm512_maskz_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { return _mm512_maskz_sub_epi32(__k,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_sub_epi32(0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -33, -35, 0, 0, 41, 43, -31, -47, 31, 31, 0, 0, -57, -59, 0, -31)); + __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_sub_epi32 @@ -3014,12 +3016,16 @@ __m512i test_mm512_mask_sub_epi32 (__mmask16 __k,__m512i __A, __m512i __B, return _mm512_mask_sub_epi32(__src,__k,__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_sub_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -33, -35, 99, 99, 41, 43, -31, -47, 31, 31, 99, 99, -57, -59, 99, -31)); + __m512i test_mm512_sub_epi32(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi32 //CHECK: sub <16 x i32> return _mm512_sub_epi32(__A,__B); } +TEST_CONSTEXPR(match_v16si(_mm512_sub_epi32((__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -33, -35, -31, 39, 41, 43, -31, -47, 31, 31, -53, 55, -57, -59, 61, -31)); + __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi64 //CHECK: sub <8 x i64> %{{.*}}, %{{.*}} @@ -3027,6 +3033,8 @@ __m512i test_mm512_maskz_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B) { return _mm512_maskz_sub_epi64(__k,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_sub_epi64(0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 0, 19, 0, 15, 0, 27, 29, 0)); + __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) { //CHECK-LABEL: test_mm512_mask_sub_epi64 @@ -3035,12 +3043,16 @@ __m512i test_mm512_mask_sub_epi64 (__mmask8 __k,__m512i __A, __m512i __B, return _mm512_mask_sub_epi64(__src,__k,__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_sub_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 99, 19, 99, 15, 99, 27, 29, 99)); + __m512i test_mm512_sub_epi64(__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi64 //CHECK: sub <8 x i64> return _mm512_sub_epi64(__A,__B); } +TEST_CONSTEXPR(match_v8di(_mm512_sub_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), -15, 19, -15, 15, -25, 27, 29, 15)); + __m512i test_mm512_maskz_add_epi32 (__mmask16 __k,__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi32 //CHECK: add <16 x i32> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 893187350b61b..2416468f9c430 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -653,6 +653,8 @@ __m256i test_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, return _mm256_mask_sub_epi32 (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_sub_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 17, 99, 99, 99, 99, -27, 99, 15)); + __m256i test_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_sub_epi32 //CHECK: sub <8 x i32> %{{.*}}, %{{.*}} @@ -660,6 +662,8 @@ __m256i test_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_maskz_sub_epi32 (__U,__A,__B); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_sub_epi32(0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 17, 0, 0, 0, 0, -27, 0, 15)); + __m256i test_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_sub_epi64 @@ -668,6 +672,8 @@ __m256i test_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, return _mm256_mask_sub_epi64 (__W,__U,__A,__B); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_sub_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 99, 99, 99, 99)); + __m256i test_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_sub_epi64 //CHECK: sub <4 x i64> %{{.*}}, %{{.*}} @@ -675,6 +681,8 @@ __m256i test_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_maskz_sub_epi64 (__U,__A,__B); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_sub_epi64(0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 0, 0, 0, 0)); + __m128i test_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_add_epi32 @@ -721,6 +729,8 @@ __m128i test_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, return _mm_mask_sub_epi32(__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_mask_sub_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 99, 11, 99, 7)); + __m128i test_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_sub_epi32 //CHECK: sub <4 x i32> %{{.*}}, %{{.*}} @@ -728,6 +738,8 @@ __m128i test_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_maskz_sub_epi32(__U, __A, __B); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_sub_epi32(0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 0, 11, 0, 7)); + __m128i test_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_sub_epi64 @@ -736,6 +748,8 @@ __m128i test_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, return _mm_mask_sub_epi64 (__W, __U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_mask_sub_epi64((__m128i)(__v2di){ 99, 99}, 0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 3, -3)); + __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_sub_epi64 //CHECK: sub <2 x i64> %{{.*}}, %{{.*}} @@ -743,6 +757,8 @@ __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_maskz_sub_epi64 (__U, __A, __B); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_sub_epi64(0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 3, -3)); + __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { //CHECK-LABEL: test_mm256_mask_mul_epi32 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index e5e0df7bca5ed..8f615fea6b017 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -787,6 +787,8 @@ __m256i test_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m25 return _mm256_mask_sub_epi8(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_sub_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 99, -63, 99, 63, -93, 99, -63, 63, 63, 99, 99, -107, 109, 99, 99, 99, 99, 99, 99, 63, 99, 99)); + __m256i test_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_sub_epi8 //CHECK: sub <32 x i8> %{{.*}}, %{{.*}} @@ -794,6 +796,8 @@ __m256i test_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_maskz_sub_epi8(__U , __A, __B); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_sub_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 0, -63, 0, 63, -93, 0, -63, 63, 63, 0, 0, -107, 109, 0, 0, 0, 0, 0, 0, 63, 0, 0)); + __m256i test_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_sub_epi16 //CHECK: sub <16 x i16> %{{.*}}, %{{.*}} @@ -801,6 +805,8 @@ __m256i test_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m2 return _mm256_mask_sub_epi16(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_sub_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -31, 99, 99, 99, 99, 43, 99, 99, 99, -51, 99, 55, -31, 99, 61, 99)); + __m256i test_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_sub_epi16 //CHECK: sub <16 x i16> %{{.*}}, %{{.*}} @@ -808,6 +814,8 @@ __m256i test_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_maskz_sub_epi16(__U , __A, __B); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_sub_epi16(0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -31, 0, 0, 0, 0, 43, 0, 0, 0, -51, 0, 55, -31, 0, 61, 0)); + __m128i test_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_add_epi8 //CHECK: add <16 x i8> %{{.*}}, %{{.*}} @@ -851,6 +859,8 @@ __m128i test_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i return _mm_mask_sub_epi8(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_sub_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 99, 99, -41, 99, 99, 31, 99, 51, -53, 99, -57, 99, 61, 99)); + __m128i test_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_sub_epi8 //CHECK: sub <16 x i8> %{{.*}}, %{{.*}} @@ -858,6 +868,8 @@ __m128i test_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_maskz_sub_epi8(__U , __A, __B); } +TEST_CONSTEXPR(match_v16qi(_mm_maskz_sub_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 0, 0, -41, 0, 0, 31, 0, 51, -53, 0, -57, 0, 61, 0)); + __m128i test_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_sub_epi16 //CHECK: sub <8 x i16> %{{.*}}, %{{.*}} @@ -865,6 +877,8 @@ __m128i test_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i return _mm_mask_sub_epi16(__W, __U , __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_sub_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -17, 99, -21, -15, 99, 99, 99, 31)); + __m128i test_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_sub_epi16 //CHECK: sub <8 x i16> %{{.*}}, %{{.*}} @@ -872,6 +886,8 @@ __m128i test_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_maskz_sub_epi16(__U , __A, __B); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_sub_epi16(0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -17, 0, -21, -15, 0, 0, 0, 31)); + __m256i test_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_mullo_epi16 //CHECK: mul <16 x i16> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 6f9975fa24939..628ebedfb6c56 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1721,25 +1721,31 @@ __m128i test_mm_sub_epi8(__m128i A, __m128i B) { return _mm_sub_epi8(A, B); } +TEST_CONSTEXPR(match_v16qi(_mm_sub_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 37, -31, -41, -31, 45, 31, -49, 51, -53, 31, -57, -59, 61, -63)); + __m128i test_mm_sub_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi16 // CHECK: sub <8 x i16> return _mm_sub_epi16(A, B); } +TEST_CONSTEXPR(match_v8hi(_mm_sub_epi16((__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -17, -19, -21, -15, -15, 27, 15, 31)); + __m128i test_mm_sub_epi32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi32 // CHECK: sub <4 x i32> return _mm_sub_epi32(A, B); } -TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0)); + +TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 9, 11, 7, 7)); __m128i test_mm_sub_epi64(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi64 // CHECK: sub <2 x i64> return _mm_sub_epi64(A, B); } -TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11)); + +TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 3, -3)); __m128d test_mm_sub_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_sub_pd From 1df56c5add51534108bb64691b00606c885cb8bf Mon Sep 17 00:00:00 2001 From: donneypr Date: Fri, 19 Sep 2025 18:08:43 -0400 Subject: [PATCH 11/15] mask and maskz (sub) in CONSTEXPR llvm#152490 --- clang/lib/Headers/avx512vlbwintrin.h | 24 ++++++++++++------------ clang/lib/Headers/avx512vlintrin.h | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 6e3efa7b3562c..76eb6761a9cd9 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -340,84 +340,84 @@ _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index d85ea23d5ee5a..091af1887e36a 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -267,7 +267,7 @@ _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -275,7 +275,7 @@ _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -283,7 +283,7 @@ _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -291,7 +291,7 @@ _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -331,7 +331,7 @@ _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -339,7 +339,7 @@ _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -347,7 +347,7 @@ _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -355,7 +355,7 @@ _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, From bb2c2e3dc5e97ca05500374262ad8b0cc392750a Mon Sep 17 00:00:00 2001 From: donneypr Date: Fri, 19 Sep 2025 18:19:02 -0400 Subject: [PATCH 12/15] mask and maskz (add) in CONSTEXPR llvm#152490 --- clang/lib/Headers/avx512vlbwintrin.h | 8 +++--- clang/lib/Headers/avx512vlintrin.h | 40 +++++++++++----------------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 76eb6761a9cd9..5b4a0cb3e5174 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -312,28 +312,28 @@ #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 091af1887e36a..ca4733972b75a 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -235,7 +235,7 @@ typedef char __v2qi __attribute__((__vector_size__(2))); #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -243,7 +243,7 @@ _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -251,7 +251,7 @@ _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -259,7 +259,7 @@ _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -268,38 +268,34 @@ _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sub_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_sub_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sub_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_sub_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -307,7 +303,7 @@ _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -315,7 +311,7 @@ _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -323,7 +319,7 @@ _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -332,32 +328,28 @@ _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sub_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_sub_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sub_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sub_epi64(__A, __B), (__v2di)_mm_setzero_si128()); From ce5d3b6ee94a89a4835cf2b86b15d8c238f4ddc1 Mon Sep 17 00:00:00 2001 From: donneypr Date: Sun, 21 Sep 2025 11:56:32 -0400 Subject: [PATCH 13/15] clang-formatted and also converted __v16qi/__v32qi/__v64qi to __v16qs/__v32qs/__v64qs (signed) llvm#152490 --- clang/test/CodeGen/X86/avx2-builtins.c | 4 ++-- clang/test/CodeGen/X86/avx512bw-builtins.c | 12 ++++++------ clang/test/CodeGen/X86/avx512vlbw-builtins.c | 16 ++++++++-------- clang/test/CodeGen/X86/sse2-builtins.c | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 7000089db885b..1778683a24016 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -48,7 +48,7 @@ __m256i test_mm256_add_epi8(__m256i a, __m256i b) { return _mm256_add_epi8(a, b); } -TEST_CONSTEXPR(match_v32qi(_mm256_add_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 85, -87, 89, 91, -63, 95, -97, 99, 101, -63, 63, -63, 63, 63, -63, -63, -117, 63, -121, 123, 63, 127)); +TEST_CONSTEXPR(match_v32qi(_mm256_add_epi8((__m256i)(__v32qs){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qs){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 85, -87, 89, 91, -63, 95, -97, 99, 101, -63, 63, -63, 63, 63, -63, -63, -117, 63, -121, 123, 63, 127)); __m256i test_mm256_add_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_add_epi16 @@ -1368,7 +1368,7 @@ __m256i test_mm256_sub_epi8(__m256i a, __m256i b) { return _mm256_sub_epi8(a, b); } -TEST_CONSTEXPR(match_v32qi(_mm256_sub_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 63, -63, 63, 63, -93, 63, -63, 63, 63, -103, 105, -107, 109, 111, -113, -115, -63, 119, -63, 63, 125, 63)); +TEST_CONSTEXPR(match_v32qi(_mm256_sub_epi8((__m256i)(__v32qs){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qs){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 63, -63, 63, 63, -93, 63, -63, 63, 63, -103, 105, -107, 109, 111, -113, -115, -63, 119, -63, 63, 125, 63)); __m256i test_mm256_sub_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sub_epi16 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index bf5534c0e5155..45c1bec95772e 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -752,7 +752,7 @@ __m512i test_mm512_add_epi8 (__m512i __A, __m512i __B) { return _mm512_add_epi8(__A,__B); } -TEST_CONSTEXPR(match_v64qi(_mm512_add_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 125, -127, 127, 127, 127, 115, 113, -111, -109, -127, -105, 103, 101, -99, -127, 127, 93, 127, -89, 127, 127, -83, -81, -127, 127, -75, 73, 71, 127, 67, 127, 127, 61, 59, 127, 55, -127, 51, 127, -127, 45, 127, -41, 127, 127, -35, -127, 127, 29, -27, 25, 127, -127, -127, 127, 127, -127, -11, -9, -7, 5, -3, 1)); +TEST_CONSTEXPR(match_v64qi(_mm512_add_epi8((__m512i)(__v64qs){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qs){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 125, -127, 127, 127, 127, 115, 113, -111, -109, -127, -105, 103, 101, -99, -127, 127, 93, 127, -89, 127, 127, -83, -81, -127, 127, -75, 73, 71, 127, 67, 127, 127, 61, 59, 127, 55, -127, 51, 127, -127, 45, 127, -41, 127, 127, -35, -127, 127, 29, -27, 25, 127, -127, -127, 127, 127, -127, -11, -9, -7, 5, -3, 1)); __m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_add_epi8 @@ -761,7 +761,7 @@ __m512i test_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m51 return _mm512_mask_add_epi8(__W, __U, __A, __B); } -TEST_CONSTEXPR(match_v64qi(_mm512_mask_add_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 99, 99, 99, 127, 127, 99, 113, 99, 99, 99, -105, 99, 101, 99, 99, 127, 99, 99, -89, 127, 127, -83, 99, 99, 127, 99, 73, 71, 127, 99, 127, 99, 99, 99, 127, 55, 99, 51, 99, -127, 45, 99, 99, 127, 99, -35, -127, 99, 99, 99, 25, 99, 99, 99, 127, 99, 99, -11, 99, 99, 5, 99, 99)); +TEST_CONSTEXPR(match_v64qi(_mm512_mask_add_epi8((__m512i)(__v64qs){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qs){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qs){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 99, 99, 99, 127, 127, 99, 113, 99, 99, 99, -105, 99, 101, 99, 99, 127, 99, 99, -89, 127, 127, -83, 99, 99, 127, 99, 73, 71, 127, 99, 127, 99, 99, 99, 127, 55, 99, 51, 99, -127, 45, 99, 99, 127, 99, -35, -127, 99, 99, 99, 25, 99, 99, 99, 127, 99, 99, -11, 99, 99, 5, 99, 99)); __m512i test_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_add_epi8 @@ -770,7 +770,7 @@ __m512i test_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_maskz_add_epi8(__U, __A, __B); } -TEST_CONSTEXPR(match_v64qi(_mm512_maskz_add_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 0, 0, 0, 127, 127, 0, 113, 0, 0, 0, -105, 0, 101, 0, 0, 127, 0, 0, -89, 127, 127, -83, 0, 0, 127, 0, 73, 71, 127, 0, 127, 0, 0, 0, 127, 55, 0, 51, 0, -127, 45, 0, 0, 127, 0, -35, -127, 0, 0, 0, 25, 0, 0, 0, 127, 0, 0, -11, 0, 0, 5, 0, 0)); +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_add_epi8(0x2488D358BA7928B1, (__m512i)(__v64qs){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qs){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 0, 0, 0, 127, 127, 0, 113, 0, 0, 0, -105, 0, 101, 0, 0, 127, 0, 0, -89, 127, 127, -83, 0, 0, 127, 0, 73, 71, 127, 0, 127, 0, 0, 0, 127, 55, 0, 51, 0, -127, 45, 0, 0, 127, 0, -35, -127, 0, 0, 0, 25, 0, 0, 0, 127, 0, 0, -11, 0, 0, 5, 0, 0)); __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_sub_epi8 @@ -778,7 +778,7 @@ __m512i test_mm512_sub_epi8 (__m512i __A, __m512i __B) { return _mm512_sub_epi8(__A, __B); } -TEST_CONSTEXPR(match_v64qi(_mm512_sub_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, -127, 123, -121, -119, -117, -127, -127, 127, 127, 107, 127, -127, -127, 127, 97, -95, -127, -91, 127, -87, -85, 127, 127, 79, -77, 127, -127, -127, -69, -127, -65, -63, -127, -127, -57, -127, 53, -127, -49, 47, -127, -43, 127, -39, -37, 127, 33, -31, -127, 127, -127, -23, 21, 19, -17, -15, 13, 127, 127, 127, -127, 127, -127)); +TEST_CONSTEXPR(match_v64qi(_mm512_sub_epi8((__m512i)(__v64qs){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qs){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, -127, 123, -121, -119, -117, -127, -127, 127, 127, 107, 127, -127, -127, 127, 97, -95, -127, -91, 127, -87, -85, 127, 127, 79, -77, 127, -127, -127, -69, -127, -65, -63, -127, -127, -57, -127, 53, -127, -49, 47, -127, -43, 127, -39, -37, 127, 33, -31, -127, 127, -127, -23, 21, 19, -17, -15, 13, 127, 127, 127, -127, 127, -127)); __m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_mask_sub_epi8 @@ -787,7 +787,7 @@ __m512i test_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m51 return _mm512_mask_sub_epi8(__W, __U, __A, __B); } -TEST_CONSTEXPR(match_v64qi(_mm512_mask_sub_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 99, 99, 99, -119, -117, 99, -127, 99, 99, 99, 127, 99, -127, 99, 99, -95, 99, 99, 127, -87, -85, 127, 99, 99, -77, 99, -127, -127, -69, 99, -65, 99, 99, 99, -57, -127, 99, -127, 99, 47, -127, 99, 99, -39, 99, 127, 33, 99, 99, 99, -127, 99, 99, 99, -17, 99, 99, 127, 99, 99, -127, 99, 99)); +TEST_CONSTEXPR(match_v64qi(_mm512_mask_sub_epi8((__m512i)(__v64qs){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qs){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qs){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 99, 99, 99, -119, -117, 99, -127, 99, 99, 99, 127, 99, -127, 99, 99, -95, 99, 99, 127, -87, -85, 127, 99, 99, -77, 99, -127, -127, -69, 99, -65, 99, 99, 99, -57, -127, 99, -127, 99, 47, -127, 99, 99, -39, 99, 127, 33, 99, 99, 99, -127, 99, 99, 99, -17, 99, 99, 127, 99, 99, -127, 99, 99)); __m512i test_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_maskz_sub_epi8 @@ -796,7 +796,7 @@ __m512i test_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_maskz_sub_epi8(__U, __A, __B); } -TEST_CONSTEXPR(match_v64qi(_mm512_maskz_sub_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 0, 0, 0, -119, -117, 0, -127, 0, 0, 0, 127, 0, -127, 0, 0, -95, 0, 0, 127, -87, -85, 127, 0, 0, -77, 0, -127, -127, -69, 0, -65, 0, 0, 0, -57, -127, 0, -127, 0, 47, -127, 0, 0, -39, 0, 127, 33, 0, 0, 0, -127, 0, 0, 0, -17, 0, 0, 127, 0, 0, -127, 0, 0)); +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_sub_epi8(0x2488D358BA7928B1, (__m512i)(__v64qs){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qs){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 0, 0, 0, -119, -117, 0, -127, 0, 0, 0, 127, 0, -127, 0, 0, -95, 0, 0, 127, -87, -85, 127, 0, 0, -77, 0, -127, -127, -69, 0, -65, 0, 0, 0, -57, -127, 0, -127, 0, 47, -127, 0, 0, -39, 0, 127, 33, 0, 0, 0, -127, 0, 0, 0, -17, 0, 0, 127, 0, 0, -127, 0, 0)); __m512i test_mm512_add_epi16 (__m512i __A, __m512i __B) { //CHECK-LABEL: test_mm512_add_epi16 diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 8f615fea6b017..ad7ff5e6ad0e1 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -751,7 +751,7 @@ __m256i test_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m25 return _mm256_mask_add_epi8(__W, __U , __A, __B); } -TEST_CONSTEXPR(match_v32qi(_mm256_mask_add_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 99, -87, 99, 91, -63, 99, -97, 99, 101, 99, 99, -63, 63, 99, 99, 99, 99, 99, 99, 123, 99, 99)); +TEST_CONSTEXPR(match_v32qi(_mm256_mask_add_epi8((__m256i)(__v32qs){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qs){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qs){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 99, -87, 99, 91, -63, 99, -97, 99, 101, 99, 99, -63, 63, 99, 99, 99, 99, 99, 99, 123, 99, 99)); __m256i test_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_add_epi8 @@ -760,7 +760,7 @@ __m256i test_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_maskz_add_epi8(__U , __A, __B); } -TEST_CONSTEXPR(match_v32qi(_mm256_maskz_add_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 0, -87, 0, 91, -63, 0, -97, 99, 101, 0, 0, -63, 63, 0, 0, 0, 0, 0, 0, 123, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_add_epi8(0x20676BFF, (__m256i)(__v32qs){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qs){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 0, -87, 0, 91, -63, 0, -97, 99, 101, 0, 0, -63, 63, 0, 0, 0, 0, 0, 0, 123, 0, 0)); __m256i test_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_add_epi16 @@ -787,7 +787,7 @@ __m256i test_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m25 return _mm256_mask_sub_epi8(__W, __U , __A, __B); } -TEST_CONSTEXPR(match_v32qi(_mm256_mask_sub_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 99, -63, 99, 63, -93, 99, -63, 63, 63, 99, 99, -107, 109, 99, 99, 99, 99, 99, 99, 63, 99, 99)); +TEST_CONSTEXPR(match_v32qi(_mm256_mask_sub_epi8((__m256i)(__v32qs){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qs){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qs){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 99, -63, 99, 63, -93, 99, -63, 63, 63, 99, 99, -107, 109, 99, 99, 99, 99, 99, 99, 63, 99, 99)); __m256i test_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_maskz_sub_epi8 @@ -796,7 +796,7 @@ __m256i test_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_maskz_sub_epi8(__U , __A, __B); } -TEST_CONSTEXPR(match_v32qi(_mm256_maskz_sub_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 0, -63, 0, 63, -93, 0, -63, 63, 63, 0, 0, -107, 109, 0, 0, 0, 0, 0, 0, 63, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_sub_epi8(0x20676BFF, (__m256i)(__v32qs){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qs){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 0, -63, 0, 63, -93, 0, -63, 63, 63, 0, 0, -107, 109, 0, 0, 0, 0, 0, 0, 63, 0, 0)); __m256i test_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { //CHECK-LABEL: test_mm256_mask_sub_epi16 @@ -823,7 +823,7 @@ __m128i test_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i return _mm_mask_add_epi8(__W, __U , __A, __B); } -TEST_CONSTEXPR(match_v16qi(_mm_mask_add_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 99, 99, -31, 99, 99, 47, 99, 31, -31, 99, -31, 99, 31, 99)); +TEST_CONSTEXPR(match_v16qi(_mm_mask_add_epi8((__m128i)(__v16qs){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qs){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qs){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 99, 99, -31, 99, 99, 47, 99, 31, -31, 99, -31, 99, 31, 99)); __m128i test_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_add_epi8 @@ -832,7 +832,7 @@ __m128i test_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_maskz_add_epi8(__U , __A, __B); } -TEST_CONSTEXPR(match_v16qi(_mm_maskz_add_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 0, 0, -31, 0, 0, 47, 0, 31, -31, 0, -31, 0, 31, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_maskz_add_epi8(0x5693, (__m128i)(__v16qs){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qs){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 0, 0, -31, 0, 0, 47, 0, 31, -31, 0, -31, 0, 31, 0)); __m128i test_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_add_epi16 @@ -859,7 +859,7 @@ __m128i test_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i return _mm_mask_sub_epi8(__W, __U , __A, __B); } -TEST_CONSTEXPR(match_v16qi(_mm_mask_sub_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 99, 99, -41, 99, 99, 31, 99, 51, -53, 99, -57, 99, 61, 99)); +TEST_CONSTEXPR(match_v16qi(_mm_mask_sub_epi8((__m128i)(__v16qs){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qs){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qs){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 99, 99, -41, 99, 99, 31, 99, 51, -53, 99, -57, 99, 61, 99)); __m128i test_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_maskz_sub_epi8 @@ -868,7 +868,7 @@ __m128i test_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_maskz_sub_epi8(__U , __A, __B); } -TEST_CONSTEXPR(match_v16qi(_mm_maskz_sub_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 0, 0, -41, 0, 0, 31, 0, 51, -53, 0, -57, 0, 61, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_maskz_sub_epi8(0x5693, (__m128i)(__v16qs){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qs){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 0, 0, -41, 0, 0, 31, 0, 51, -53, 0, -57, 0, 61, 0)); __m128i test_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { //CHECK-LABEL: test_mm_mask_sub_epi16 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 628ebedfb6c56..ddcff9728c543 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -32,7 +32,7 @@ __m128i test_mm_add_epi8(__m128i A, __m128i B) { return _mm_add_epi8(A, B); } -TEST_CONSTEXPR(match_v16qi(_mm_add_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 31, -39, -31, -43, 31, 47, -31, 31, -31, 55, -31, -31, 31, -31)); +TEST_CONSTEXPR(match_v16qi(_mm_add_epi8((__m128i)(__v16qs){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qs){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 31, -39, -31, -43, 31, 47, -31, 31, -31, 55, -31, -31, 31, -31)); __m128i test_mm_add_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi16 @@ -1721,7 +1721,7 @@ __m128i test_mm_sub_epi8(__m128i A, __m128i B) { return _mm_sub_epi8(A, B); } -TEST_CONSTEXPR(match_v16qi(_mm_sub_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 37, -31, -41, -31, 45, 31, -49, 51, -53, 31, -57, -59, 61, -63)); +TEST_CONSTEXPR(match_v16qi(_mm_sub_epi8((__m128i)(__v16qs){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qs){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 37, -31, -41, -31, 45, 31, -49, 51, -53, 31, -57, -59, 61, -63)); __m128i test_mm_sub_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi16 From 1e40f329848ac7c61db75128a350f937d8fc3ad2 Mon Sep 17 00:00:00 2001 From: donneypr Date: Sun, 21 Sep 2025 12:46:52 -0400 Subject: [PATCH 14/15] clang-format avx512vlbwintrin.h --- clang/lib/Headers/avx512vlbwintrin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 5b4a0cb3e5174..1d663b0bd8113 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -313,7 +313,7 @@ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ +_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); From c01e2a34c61e2ebd0381dd766f9e96261f011835 Mon Sep 17 00:00:00 2001 From: donneypr Date: Sun, 21 Sep 2025 12:54:36 -0400 Subject: [PATCH 15/15] clang-format again, missed avx512vlintrin.h --- clang/lib/Headers/avx512vlintrin.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index ca4733972b75a..e0fbb8b397c08 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -236,32 +236,28 @@ typedef char __v2qi __attribute__((__vector_size__(2))); _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_add_epi32(__A, __B), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_add_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_add_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_add_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); @@ -296,32 +292,28 @@ _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_add_epi32(__A, __B), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_add_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_add_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_add_epi64(__A, __B), (__v2di)_mm_setzero_si128());