Skip to content

Conversation

donneypr
Copy link
Contributor

@donneypr donneypr commented Sep 9, 2025

Fixes #152490

Copy link

github-actions bot commented Sep 9, 2025

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Sep 9, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 9, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: don (donneypr)

Changes

Fixes #152490


Full diff: https://github.com/llvm/llvm-project/pull/157582.diff

3 Files Affected:

  • (modified) clang/lib/Headers/avx2intrin.h (+8-8)
  • (modified) clang/lib/Headers/avx512fintrin.h (+7-7)
  • (modified) clang/lib/Headers/emmintrin.h (+4-4)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2cacdc3c4596c..5c8c2996229c6 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -279,7 +279,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 /// \param __b
 ///    A 256-bit integer vector containing one of the source operands.
 /// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
@@ -298,7 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing one of the source operands.
 /// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
@@ -317,7 +317,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing one of the source operands.
 /// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
@@ -336,7 +336,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing one of the source operands.
 /// \returns A 256-bit vector of [4 x i64] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
@@ -2462,7 +2462,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
 /// \param __b
 ///    A 256-bit integer vector containing the subtrahends.
 /// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a - (__v32qu)__b);
@@ -2489,7 +2489,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [16 x i16] containing the subtrahends.
 /// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a - (__v16hu)__b);
@@ -2515,7 +2515,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [8 x i32] containing the subtrahends.
 /// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a - (__v8su)__b);
@@ -2541,7 +2541,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
 /// \param __b
 ///    A 256-bit vector of [4 x i64] containing the subtrahends.
 /// \returns A 256-bit vector of [4 x i64] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_sub_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a - (__v4du)__b);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 67499fd83a089..1d3aeb284b00c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -859,7 +859,7 @@ _mm512_add_epi64(__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A + (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -867,7 +867,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -875,13 +875,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sub_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8du) __A - (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -889,7 +889,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -897,7 +897,7 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_add_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A + (__v16su) __B);
@@ -919,7 +919,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
 _mm512_sub_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A - (__v16su) __B);
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index a366e0df407a9..c99c85f26c6d1 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2060,7 +2060,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
 ///    A 128-bit vector of [16 x i8].
 /// \returns A 128-bit vector of [16 x i8] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
                                                           __m128i __b) {
   return (__m128i)((__v16qu)__a + (__v16qu)__b);
 }
@@ -2081,7 +2081,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
 ///    A 128-bit vector of [8 x i16].
 /// \returns A 128-bit vector of [8 x i16] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a,
                                                            __m128i __b) {
   return (__m128i)((__v8hu)__a + (__v8hu)__b);
 }
@@ -2499,7 +2499,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
                                                           __m128i __b) {
   return (__m128i)((__v16qu)__a - (__v16qu)__b);
 }
@@ -2516,7 +2516,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a,
                                                            __m128i __b) {
   return (__m128i)((__v8hu)__a - (__v8hu)__b);
 }

@donneypr donneypr changed the title Constexpr addsub for #152490 [Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer arithmetic intrinsics to be used in constexpr Sep 9, 2025
@RKSimon RKSimon self-requested a review September 9, 2025 08:57
Copy link

github-actions bot commented Sep 9, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The next (tedious) step is to add test coverage - see #156369 for examples

@donneypr
Copy link
Contributor Author

donneypr commented Sep 9, 2025

@RKSimon Thank you! Will get back to you soon.

@donneypr donneypr requested a review from RKSimon September 9, 2025 17:10
@donneypr
Copy link
Contributor Author

donneypr commented Sep 9, 2025

@RKSimon , I'm a little confused as to why the build is failing and what else I would need to add/change, would you be able to point me to the right direction? Thank you 😄 .

@donneypr
Copy link
Contributor Author

Reverted my tests, working on them again from the start.

@donneypr
Copy link
Contributor Author

donneypr commented Sep 15, 2025

@RKSimon I've clang-formatted it and everything seems good, I think I'm ready to write some tests, please confirm and let me know. Thank you!

@donneypr donneypr requested a review from RKSimon September 15, 2025 21:24
@RKSimon
Copy link
Collaborator

RKSimon commented Sep 19, 2025

@donneypr I've been playing with a script to help autogen the tests as that seems to be bottleneck for people - see if these work OK (they still need putting in the correct builtins files):

TEST_CONSTEXPR(match_v16qi(_mm_add_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  33, 35, 31, -39, -31, -43, 31, 47, -31, 31, -31, 55, -31, -31, 31, -31));
TEST_CONSTEXPR(match_v16qi(_mm_mask_add_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 99, 99, -31, 99, 99, 47, 99, 31, -31, 99, -31, 99, 31, 99));
TEST_CONSTEXPR(match_v16qi(_mm_maskz_add_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  33, 35, 0, 0, -31, 0, 0, 47, 0, 31, -31, 0, -31, 0, 31, 0));
TEST_CONSTEXPR(match_v32qi(_mm256_add_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 85, -87, 89, 91, -63, 95, -97, 99, 101, -63, 63, -63, 63, 63, -63, -63, -117, 63, -121, 123, 63, 127));
TEST_CONSTEXPR(match_v32qi(_mm256_mask_add_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 99, -87, 99, 91, -63, 99, -97, 99, 101, 99, 99, -63, 63, 99, 99, 99, 99, 99, 99, 123, 99, 99));
TEST_CONSTEXPR(match_v32qi(_mm256_maskz_add_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 0, -87, 0, 91, -63, 0, -97, 99, 101, 0, 0, -63, 63, 0, 0, 0, 0, 0, 0, 123, 0, 0));
TEST_CONSTEXPR(match_v64qi(_mm512_add_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  -127, 125, -127, 127, 127, 127, 115, 113, -111, -109, -127, -105, 103, 101, -99, -127, 127, 93, 127, -89, 127, 127, -83, -81, -127, 127, -75, 73, 71, 127, 67, 127, 127, 61, 59, 127, 55, -127, 51, 127, -127, 45, 127, -41, 127, 127, -35, -127, 127, 29, -27, 25, 127, -127, -127, 127, 127, -127, -11, -9, -7, 5, -3, 1));
TEST_CONSTEXPR(match_v64qi(_mm512_mask_add_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 99, 99, 99, 127, 127, 99, 113, 99, 99, 99, -105, 99, 101, 99, 99, 127, 99, 99, -89, 127, 127, -83, 99, 99, 127, 99, 73, 71, 127, 99, 127, 99, 99, 99, 127, 55, 99, 51, 99, -127, 45, 99, 99, 127, 99, -35, -127, 99, 99, 99, 25, 99, 99, 99, 127, 99, 99, -11, 99, 99, 5, 99, 99));
TEST_CONSTEXPR(match_v64qi(_mm512_maskz_add_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  -127, 0, 0, 0, 127, 127, 0, 113, 0, 0, 0, -105, 0, 101, 0, 0, 127, 0, 0, -89, 127, 127, -83, 0, 0, 127, 0, 73, 71, 127, 0, 127, 0, 0, 0, 127, 55, 0, 51, 0, -127, 45, 0, 0, 127, 0, -35, -127, 0, 0, 0, 25, 0, 0, 0, 127, 0, 0, -11, 0, 0, 5, 0, 0));
TEST_CONSTEXPR(match_v8hi(_mm_add_epi16((__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -15, -15, -15, -23, -25, 15, 29, 15));
TEST_CONSTEXPR(match_v8hi(_mm_mask_add_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -15, 99, -15, -23, 99, 99, 99, 15));
TEST_CONSTEXPR(match_v8hi(_mm_maskz_add_epi16(0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -15, 0, -15, -23, 0, 0, 0, 15));
TEST_CONSTEXPR(match_v16hi(_mm256_add_epi16((__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -33, -31, 37, -39, -31, 31, -31, -31, -31, -31, -53, 31, -57, -31, 31, 31));
TEST_CONSTEXPR(match_v16hi(_mm256_mask_add_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -33, 99, 99, 99, 99, 31, 99, 99, 99, -31, 99, 31, -57, 99, 31, 99));
TEST_CONSTEXPR(match_v16hi(_mm256_maskz_add_epi16(0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -33, 0, 0, 0, 0, 31, 0, 0, 0, -31, 0, 31, -57, 0, 31, 0));
TEST_CONSTEXPR(match_v32hi(_mm512_add_epi16((__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  65, 67, 63, 63, -63, 75, -77, 63, 63, -83, 63, -63, 89, 63, -63, -63, 63, 63, 101, -103, -105, -63, 63, 63, -63, 63, -117, 63, 63, -63, 63, 63));
TEST_CONSTEXPR(match_v32hi(_mm512_mask_add_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 65, 67, 99, 99, 99, 75, 99, 99, 63, 99, 99, 99, 99, 63, -63, -63, 63, 99, 101, 99, 99, 99, 99, 63, 99, 99, 99, 63, 63, -63, 99, 63));
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_add_epi16(0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  65, 67, 0, 0, 0, 75, 0, 0, 63, 0, 0, 0, 0, 63, -63, -63, 63, 0, 101, 0, 0, 0, 0, 63, 0, 0, 0, 63, 63, -63, 0, 63));
TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  7, 7, 13, 15));
TEST_CONSTEXPR(match_v4si(_mm_mask_add_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 99, 7, 99, 15));
TEST_CONSTEXPR(match_v4si(_mm_maskz_add_epi32(0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  0, 7, 0, 15));
TEST_CONSTEXPR(match_v8si(_mm256_add_epi32((__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  15, 19, 21, -15, 25, -15, 29, 31));
TEST_CONSTEXPR(match_v8si(_mm256_mask_add_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 15, 99, 99, 99, 99, -15, 99, 31));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_add_epi32(0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  15, 0, 0, 0, 0, -15, 0, 31));
TEST_CONSTEXPR(match_v16si(_mm512_add_epi32((__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -31, -31, -37, 31, 31, 31, -45, -31, 49, 51, -31, 31, -31, -31, 31, -63));
TEST_CONSTEXPR(match_v16si(_mm512_mask_add_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -31, -31, 99, 99, 31, 31, -45, -31, 49, 51, 99, 99, -31, -31, 99, -63));
TEST_CONSTEXPR(match_v16si(_mm512_maskz_add_epi32(0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -31, -31, 0, 0, 31, 31, -45, -31, 49, 51, 0, 0, -31, -31, 0, -63));
TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  5, -7));
TEST_CONSTEXPR(match_v2di(_mm_mask_add_epi64((__m128i)(__v2di){ 99, 99}, 0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 5, -7));
TEST_CONSTEXPR(match_v2di(_mm_maskz_add_epi64(0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  5, -7));
TEST_CONSTEXPR(match_v4di(_mm256_add_epi64((__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  7, -11, 13, 15));
TEST_CONSTEXPR(match_v4di(_mm256_mask_add_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 99, 99, 99, 99));
TEST_CONSTEXPR(match_v4di(_mm256_maskz_add_epi64(0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  0, 0, 0, 0));
TEST_CONSTEXPR(match_v8di(_mm512_add_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  -17, 15, -21, 23, -15, 15, 15, 31));
TEST_CONSTEXPR(match_v8di(_mm512_mask_add_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 99, 15, 99, 23, 99, 15, 15, 99));
TEST_CONSTEXPR(match_v8di(_mm512_maskz_add_epi64(0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  0, 15, 0, 23, 0, 15, 15, 0));

TEST_CONSTEXPR(match_v16qi(_mm_sub_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  31, 31, 37, -31, -41, -31, 45, 31, -49, 51, -53, 31, -57, -59, 61, -63));
TEST_CONSTEXPR(match_v16qi(_mm_mask_sub_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 99, 99, -41, 99, 99, 31, 99, 51, -53, 99, -57, 99, 61, 99));
TEST_CONSTEXPR(match_v16qi(_mm_maskz_sub_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  31, 31, 0, 0, -41, 0, 0, 31, 0, 51, -53, 0, -57, 0, 61, 0));
TEST_CONSTEXPR(match_v32qi(_mm256_sub_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 63, -63, 63, 63, -93, 63, -63, 63, 63, -103, 105, -107, 109, 111, -113, -115, -63, 119, -63, 63, 125, 63));
TEST_CONSTEXPR(match_v32qi(_mm256_mask_sub_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 99, -63, 99, 63, -93, 99, -63, 63, 63, 99, 99, -107, 109, 99, 99, 99, 99, 99, 99, 63, 99, 99));
TEST_CONSTEXPR(match_v32qi(_mm256_maskz_sub_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 0, -63, 0, 63, -93, 0, -63, 63, 63, 0, 0, -107, 109, 0, 0, 0, 0, 0, 0, 63, 0, 0));
TEST_CONSTEXPR(match_v64qi(_mm512_sub_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  127, -127, 123, -121, -119, -117, -127, -127, 127, 127, 107, 127, -127, -127, 127, 97, -95, -127, -91, 127, -87, -85, 127, 127, 79, -77, 127, -127, -127, -69, -127, -65, -63, -127, -127, -57, -127, 53, -127, -49, 47, -127, -43, 127, -39, -37, 127, 33, -31, -127, 127, -127, -23, 21, 19, -17, -15, 13, 127, 127, 127, -127, 127, -127));
TEST_CONSTEXPR(match_v64qi(_mm512_mask_sub_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 99, 99, 99, -119, -117, 99, -127, 99, 99, 99, 127, 99, -127, 99, 99, -95, 99, 99, 127, -87, -85, 127, 99, 99, -77, 99, -127, -127, -69, 99, -65, 99, 99, 99, -57, -127, 99, -127, 99, 47, -127, 99, 99, -39, 99, 127, 33, 99, 99, 99, -127, 99, 99, 99, -17, 99, 99, 127, 99, 99, -127, 99, 99));
TEST_CONSTEXPR(match_v64qi(_mm512_maskz_sub_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  127, 0, 0, 0, -119, -117, 0, -127, 0, 0, 0, 127, 0, -127, 0, 0, -95, 0, 0, 127, -87, -85, 127, 0, 0, -77, 0, -127, -127, -69, 0, -65, 0, 0, 0, -57, -127, 0, -127, 0, 47, -127, 0, 0, -39, 0, 127, 33, 0, 0, 0, -127, 0, 0, 0, -17, 0, 0, 127, 0, 0, -127, 0, 0));
TEST_CONSTEXPR(match_v8hi(_mm_sub_epi16((__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -17, -19, -21, -15, -15, 27, 15, 31));
TEST_CONSTEXPR(match_v8hi(_mm_mask_sub_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -17, 99, -21, -15, 99, 99, 99, 31));
TEST_CONSTEXPR(match_v8hi(_mm_maskz_sub_epi16(0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -17, 0, -21, -15, 0, 0, 0, 31));
TEST_CONSTEXPR(match_v16hi(_mm256_sub_epi16((__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -31, -35, 31, -31, -41, 43, -45, -47, -49, -51, -31, 55, -31, -59, 61, 63));
TEST_CONSTEXPR(match_v16hi(_mm256_mask_sub_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -31, 99, 99, 99, 99, 43, 99, 99, 99, -51, 99, 55, -31, 99, 61, 99));
TEST_CONSTEXPR(match_v16hi(_mm256_maskz_sub_epi16(0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -31, 0, 0, 0, 0, 43, 0, 0, 0, -51, 0, 55, -31, 0, 61, 0));
TEST_CONSTEXPR(match_v32hi(_mm512_sub_epi16((__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  63, 63, 69, 71, -73, 63, -63, 79, 81, -63, 85, -87, 63, 91, -93, -95, 97, 99, 63, -63, -63, -107, 109, 111, -113, 115, -63, 119, 121, -123, 125, 127));
TEST_CONSTEXPR(match_v32hi(_mm512_mask_sub_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 63, 63, 99, 99, 99, 63, 99, 99, 81, 99, 99, 99, 99, 91, -93, -95, 97, 99, 63, 99, 99, 99, 99, 111, 99, 99, 99, 119, 121, -123, 99, 127));
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sub_epi16(0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  63, 63, 0, 0, 0, 63, 0, 0, 81, 0, 0, 0, 0, 91, -93, -95, 97, 0, 63, 0, 0, 0, 0, 111, 0, 0, 0, 119, 121, -123, 0, 127));
TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  9, 11, 7, 7));
TEST_CONSTEXPR(match_v4si(_mm_mask_sub_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 99, 11, 99, 7));
TEST_CONSTEXPR(match_v4si(_mm_maskz_sub_epi32(0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  0, 11, 0, 7));
TEST_CONSTEXPR(match_v8si(_mm256_sub_epi32((__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  17, 15, 15, -23, 15, -27, 15, 15));
TEST_CONSTEXPR(match_v8si(_mm256_mask_sub_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 17, 99, 99, 99, 99, -27, 99, 15));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_sub_epi32(0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  17, 0, 0, 0, 0, -27, 0, 15));
TEST_CONSTEXPR(match_v16si(_mm512_sub_epi32((__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -33, -35, -31, 39, 41, 43, -31, -47, 31, 31, -53, 55, -57, -59, 61, -31));
TEST_CONSTEXPR(match_v16si(_mm512_mask_sub_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -33, -35, 99, 99, 41, 43, -31, -47, 31, 31, 99, 99, -57, -59, 99, -31));
TEST_CONSTEXPR(match_v16si(_mm512_maskz_sub_epi32(0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -33, -35, 0, 0, 41, 43, -31, -47, 31, 31, 0, 0, -57, -59, 0, -31));
TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  3, -3));
TEST_CONSTEXPR(match_v2di(_mm_mask_sub_epi64((__m128i)(__v2di){ 99, 99}, 0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 3, -3));
TEST_CONSTEXPR(match_v2di(_mm_maskz_sub_epi64(0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  3, -3));
TEST_CONSTEXPR(match_v4di(_mm256_sub_epi64((__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  9, -7, 7, 7));
TEST_CONSTEXPR(match_v4di(_mm256_mask_sub_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 99, 99, 99, 99));
TEST_CONSTEXPR(match_v4di(_mm256_maskz_sub_epi64(0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  0, 0, 0, 0));
TEST_CONSTEXPR(match_v8di(_mm512_sub_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  -15, 19, -15, 15, -25, 27, 29, 15));
TEST_CONSTEXPR(match_v8di(_mm512_mask_sub_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 99, 19, 99, 15, 99, 27, 29, 99));
TEST_CONSTEXPR(match_v8di(_mm512_maskz_sub_epi64(0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  0, 19, 0, 15, 0, 27, 29, 0));

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you handle the MMX intrinsics as well?

_mm_add_pi8 _mm_add_pi16 _mm_add_pi32
_mm_sub_pi8 _mm_sub_pi16 _mm_sub_pi32

@donneypr
Copy link
Contributor Author

@RKSimon from what I can see in clang/lib/Headers/mmintrin.h, it looks like all the intrinsics you mentioned are already being used in CONSTEXPR, shall I make test cases for it?

Thanks for providing the test cases for the others, I will get them in the right builtins file and get back to you

@RKSimon
Copy link
Collaborator

RKSimon commented Sep 19, 2025

@RKSimon from what I can see in clang/lib/Headers/mmintrin.h, it looks like all the intrinsics you mentioned are already being used in CONSTEXPR, shall I make test cases for it?

Ah! I forgot I'd done these ages ago - they should already have test coverage if I did it properly. Thanks for checking.

@donneypr
Copy link
Contributor Author

@donneypr I've been playing with a script to help autogen the tests as that seems to be bottleneck for people - see if these work OK (they still need putting in the correct builtins files):

TEST_CONSTEXPR(match_v16qi(_mm_add_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  33, 35, 31, -39, -31, -43, 31, 47, -31, 31, -31, 55, -31, -31, 31, -31));
TEST_CONSTEXPR(match_v16qi(_mm_mask_add_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 33, 35, 99, 99, -31, 99, 99, 47, 99, 31, -31, 99, -31, 99, 31, 99));
TEST_CONSTEXPR(match_v16qi(_mm_maskz_add_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  33, 35, 0, 0, -31, 0, 0, 47, 0, 31, -31, 0, -31, 0, 31, 0));
TEST_CONSTEXPR(match_v32qi(_mm256_add_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 85, -87, 89, 91, -63, 95, -97, 99, 101, -63, 63, -63, 63, 63, -63, -63, -117, 63, -121, 123, 63, 127));
TEST_CONSTEXPR(match_v32qi(_mm256_mask_add_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 99, -87, 99, 91, -63, 99, -97, 99, 101, 99, 99, -63, 63, 99, 99, 99, 99, 99, 99, 123, 99, 99));
TEST_CONSTEXPR(match_v32qi(_mm256_maskz_add_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -65, -67, 69, 63, 73, -75, -63, -63, -63, 63, 0, -87, 0, 91, -63, 0, -97, 99, 101, 0, 0, -63, 63, 0, 0, 0, 0, 0, 0, 123, 0, 0));
TEST_CONSTEXPR(match_v64qi(_mm512_add_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  -127, 125, -127, 127, 127, 127, 115, 113, -111, -109, -127, -105, 103, 101, -99, -127, 127, 93, 127, -89, 127, 127, -83, -81, -127, 127, -75, 73, 71, 127, 67, 127, 127, 61, 59, 127, 55, -127, 51, 127, -127, 45, 127, -41, 127, 127, -35, -127, 127, 29, -27, 25, 127, -127, -127, 127, 127, -127, -11, -9, -7, 5, -3, 1));
TEST_CONSTEXPR(match_v64qi(_mm512_mask_add_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), -127, 99, 99, 99, 127, 127, 99, 113, 99, 99, 99, -105, 99, 101, 99, 99, 127, 99, 99, -89, 127, 127, -83, 99, 99, 127, 99, 73, 71, 127, 99, 127, 99, 99, 99, 127, 55, 99, 51, 99, -127, 45, 99, 99, 127, 99, -35, -127, 99, 99, 99, 25, 99, 99, 99, 127, 99, 99, -11, 99, 99, 5, 99, 99));
TEST_CONSTEXPR(match_v64qi(_mm512_maskz_add_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  -127, 0, 0, 0, 127, 127, 0, 113, 0, 0, 0, -105, 0, 101, 0, 0, 127, 0, 0, -89, 127, 127, -83, 0, 0, 127, 0, 73, 71, 127, 0, 127, 0, 0, 0, 127, 55, 0, 51, 0, -127, 45, 0, 0, 127, 0, -35, -127, 0, 0, 0, 25, 0, 0, 0, 127, 0, 0, -11, 0, 0, 5, 0, 0));
TEST_CONSTEXPR(match_v8hi(_mm_add_epi16((__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -15, -15, -15, -23, -25, 15, 29, 15));
TEST_CONSTEXPR(match_v8hi(_mm_mask_add_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -15, 99, -15, -23, 99, 99, 99, 15));
TEST_CONSTEXPR(match_v8hi(_mm_maskz_add_epi16(0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -15, 0, -15, -23, 0, 0, 0, 15));
TEST_CONSTEXPR(match_v16hi(_mm256_add_epi16((__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -33, -31, 37, -39, -31, 31, -31, -31, -31, -31, -53, 31, -57, -31, 31, 31));
TEST_CONSTEXPR(match_v16hi(_mm256_mask_add_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -33, 99, 99, 99, 99, 31, 99, 99, 99, -31, 99, 31, -57, 99, 31, 99));
TEST_CONSTEXPR(match_v16hi(_mm256_maskz_add_epi16(0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -33, 0, 0, 0, 0, 31, 0, 0, 0, -31, 0, 31, -57, 0, 31, 0));
TEST_CONSTEXPR(match_v32hi(_mm512_add_epi16((__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  65, 67, 63, 63, -63, 75, -77, 63, 63, -83, 63, -63, 89, 63, -63, -63, 63, 63, 101, -103, -105, -63, 63, 63, -63, 63, -117, 63, 63, -63, 63, 63));
TEST_CONSTEXPR(match_v32hi(_mm512_mask_add_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 65, 67, 99, 99, 99, 75, 99, 99, 63, 99, 99, 99, 99, 63, -63, -63, 63, 99, 101, 99, 99, 99, 99, 63, 99, 99, 99, 63, 63, -63, 99, 63));
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_add_epi16(0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  65, 67, 0, 0, 0, 75, 0, 0, 63, 0, 0, 0, 0, 63, -63, -63, 63, 0, 101, 0, 0, 0, 0, 63, 0, 0, 0, 63, 63, -63, 0, 63));
TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  7, 7, 13, 15));
TEST_CONSTEXPR(match_v4si(_mm_mask_add_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 99, 7, 99, 15));
TEST_CONSTEXPR(match_v4si(_mm_maskz_add_epi32(0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  0, 7, 0, 15));
TEST_CONSTEXPR(match_v8si(_mm256_add_epi32((__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  15, 19, 21, -15, 25, -15, 29, 31));
TEST_CONSTEXPR(match_v8si(_mm256_mask_add_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 15, 99, 99, 99, 99, -15, 99, 31));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_add_epi32(0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  15, 0, 0, 0, 0, -15, 0, 31));
TEST_CONSTEXPR(match_v16si(_mm512_add_epi32((__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -31, -31, -37, 31, 31, 31, -45, -31, 49, 51, -31, 31, -31, -31, 31, -63));
TEST_CONSTEXPR(match_v16si(_mm512_mask_add_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -31, -31, 99, 99, 31, 31, -45, -31, 49, 51, 99, 99, -31, -31, 99, -63));
TEST_CONSTEXPR(match_v16si(_mm512_maskz_add_epi32(0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -31, -31, 0, 0, 31, 31, -45, -31, 49, 51, 0, 0, -31, -31, 0, -63));
TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  5, -7));
TEST_CONSTEXPR(match_v2di(_mm_mask_add_epi64((__m128i)(__v2di){ 99, 99}, 0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 5, -7));
TEST_CONSTEXPR(match_v2di(_mm_maskz_add_epi64(0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  5, -7));
TEST_CONSTEXPR(match_v4di(_mm256_add_epi64((__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  7, -11, 13, 15));
TEST_CONSTEXPR(match_v4di(_mm256_mask_add_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 99, 99, 99, 99));
TEST_CONSTEXPR(match_v4di(_mm256_maskz_add_epi64(0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  0, 0, 0, 0));
TEST_CONSTEXPR(match_v8di(_mm512_add_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  -17, 15, -21, 23, -15, 15, 15, 31));
TEST_CONSTEXPR(match_v8di(_mm512_mask_add_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 99, 15, 99, 23, 99, 15, 15, 99));
TEST_CONSTEXPR(match_v8di(_mm512_maskz_add_epi64(0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  0, 15, 0, 23, 0, 15, 15, 0));

TEST_CONSTEXPR(match_v16qi(_mm_sub_epi8((__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  31, 31, 37, -31, -41, -31, 45, 31, -49, 51, -53, 31, -57, -59, 61, -63));
TEST_CONSTEXPR(match_v16qi(_mm_mask_sub_epi8((__m128i)(__v16qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}), 31, 31, 99, 99, -41, 99, 99, 31, 99, 51, -53, 99, -57, 99, 61, 99));
TEST_CONSTEXPR(match_v16qi(_mm_maskz_sub_epi8(0x5693, (__m128i)(__v16qi){ 32, 33, 34, -35, -36, -37, 38, 39, -40, 41, -42, 43, -44, -45, 46, -47}, (__m128i)(__v16qi){ 1, 2, -3, -4, 5, -6, -7, 8, 9, -10, 11, 12, 13, 14, -15, 16}),  31, 31, 0, 0, -41, 0, 0, 31, 0, 51, -53, 0, -57, 0, 61, 0));
TEST_CONSTEXPR(match_v32qi(_mm256_sub_epi8((__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 63, -63, 63, 63, -93, 63, -63, 63, 63, -103, 105, -107, 109, 111, -113, -115, -63, 119, -63, 63, 125, 63));
TEST_CONSTEXPR(match_v32qi(_mm256_mask_sub_epi8((__m256i)(__v32qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}), -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 99, -63, 99, 63, -93, 99, -63, 63, 63, 99, 99, -107, 109, 99, 99, 99, 99, 99, 99, 63, 99, 99));
TEST_CONSTEXPR(match_v32qi(_mm256_maskz_sub_epi8(0x20676BFF, (__m256i)(__v32qi){ -64, -65, 66, 67, 68, -69, -70, -71, -72, 73, 74, -75, 76, 77, -78, 79, -80, 81, 82, -83, 84, -85, 86, 87, -88, -89, -90, 91, -92, 93, 94, 95}, (__m256i)(__v32qi){ -1, -2, 3, -4, 5, -6, 7, 8, 9, -10, 11, -12, 13, 14, 15, 16, -17, 18, 19, 20, -21, 22, -23, -24, 25, 26, -27, -28, -29, 30, -31, 32}),  -63, -63, 63, 71, 63, -63, -77, -79, -81, 83, 0, -63, 0, 63, -93, 0, -63, 63, 63, 0, 0, -107, 109, 0, 0, 0, 0, 0, 0, 63, 0, 0));
TEST_CONSTEXPR(match_v64qi(_mm512_sub_epi8((__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  127, -127, 123, -121, -119, -117, -127, -127, 127, 127, 107, 127, -127, -127, 127, 97, -95, -127, -91, 127, -87, -85, 127, 127, 79, -77, 127, -127, -127, -69, -127, -65, -63, -127, -127, -57, -127, 53, -127, -49, 47, -127, -43, 127, -39, -37, 127, 33, -31, -127, 127, -127, -23, 21, 19, -17, -15, 13, 127, 127, 127, -127, 127, -127));
TEST_CONSTEXPR(match_v64qi(_mm512_mask_sub_epi8((__m512i)(__v64qi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}), 127, 99, 99, 99, -119, -117, 99, -127, 99, 99, 99, 127, 99, -127, 99, 99, -95, 99, 99, 127, -87, -85, 127, 99, 99, -77, 99, -127, -127, -69, 99, -65, 99, 99, 99, -57, -127, 99, -127, 99, 47, -127, 99, 99, -39, 99, 127, 33, 99, 99, 99, -127, 99, 99, 99, -17, 99, 99, 127, 99, 99, -127, 99, 99));
TEST_CONSTEXPR(match_v64qi(_mm512_maskz_sub_epi8(0x2488D358BA7928B1, (__m512i)(__v64qi){ -128, 127, 126, -125, -124, -123, 122, 121, -120, -119, 118, -117, 116, 115, -114, 113, -112, 111, -110, -109, -108, -107, -106, -105, 104, -103, -102, 101, 100, -99, 98, -97, -96, 95, 94, -93, 92, 91, 90, -89, 88, 87, -86, -85, -84, -83, -82, 81, -80, 79, -78, 77, -76, 75, 74, -73, -72, 71, -70, -69, -68, 67, -66, 65}, (__m512i)(__v64qi){ 1, -2, 3, -4, -5, -6, -7, -8, 9, 10, 11, 12, -13, -14, 15, 16, -17, -18, -19, 20, -21, -22, 23, 24, 25, -26, 27, -28, -29, -30, -31, -32, -33, -34, -35, -36, -37, 38, -39, -40, 41, -42, -43, 44, -45, -46, 47, 48, -49, -50, 51, -52, -53, 54, 55, -56, -57, 58, 59, 60, 61, -62, 63, -64}),  127, 0, 0, 0, -119, -117, 0, -127, 0, 0, 0, 127, 0, -127, 0, 0, -95, 0, 0, 127, -87, -85, 127, 0, 0, -77, 0, -127, -127, -69, 0, -65, 0, 0, 0, -57, -127, 0, -127, 0, 47, -127, 0, 0, -39, 0, 127, 33, 0, 0, 0, -127, 0, 0, 0, -17, 0, 0, 127, 0, 0, -127, 0, 0));
TEST_CONSTEXPR(match_v8hi(_mm_sub_epi16((__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -17, -19, -21, -15, -15, 27, 15, 31));
TEST_CONSTEXPR(match_v8hi(_mm_mask_sub_epi16((__m128i)(__v8hi){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}), -17, 99, -21, -15, 99, 99, 99, 31));
TEST_CONSTEXPR(match_v8hi(_mm_maskz_sub_epi16(0x8D, (__m128i)(__v8hi){ -16, -17, -18, -19, -20, 21, 22, 23}, (__m128i)(__v8hi){ 1, 2, 3, -4, -5, -6, 7, -8}),  -17, 0, -21, -15, 0, 0, 0, 31));
TEST_CONSTEXPR(match_v16hi(_mm256_sub_epi16((__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -31, -35, 31, -31, -41, 43, -45, -47, -49, -51, -31, 55, -31, -59, 61, 63));
TEST_CONSTEXPR(match_v16hi(_mm256_mask_sub_epi16((__m256i)(__v16hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}), -31, 99, 99, 99, 99, 43, 99, 99, 99, -51, 99, 55, -31, 99, 61, 99));
TEST_CONSTEXPR(match_v16hi(_mm256_maskz_sub_epi16(0x5A21, (__m256i)(__v16hi){ -32, -33, 34, -35, -36, 37, -38, -39, -40, -41, -42, 43, -44, -45, 46, 47}, (__m256i)(__v16hi){ -1, 2, 3, -4, 5, -6, 7, 8, 9, 10, -11, -12, -13, 14, -15, -16}),  -31, 0, 0, 0, 0, 43, 0, 0, 0, -51, 0, 55, -31, 0, 61, 0));
TEST_CONSTEXPR(match_v32hi(_mm512_sub_epi16((__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  63, 63, 69, 71, -73, 63, -63, 79, 81, -63, 85, -87, 63, 91, -93, -95, 97, 99, 63, -63, -63, -107, 109, 111, -113, 115, -63, 119, 121, -123, 125, 127));
TEST_CONSTEXPR(match_v32hi(_mm512_mask_sub_epi16((__m512i)(__v32hi){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}), 63, 63, 99, 99, 99, 63, 99, 99, 81, 99, 99, 99, 99, 91, -93, -95, 97, 99, 63, 99, 99, 99, 99, 111, 99, 99, 99, 119, 121, -123, 99, 127));
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_sub_epi16(0xB885E123, (__m512i)(__v32hi){ 64, 65, 66, 67, -68, 69, -70, 71, 72, -73, 74, -75, 76, 77, -78, -79, 80, 81, 82, -83, -84, -85, 86, 87, -88, 89, -90, 91, 92, -93, 94, 95}, (__m512i)(__v32hi){ 1, 2, -3, -4, 5, 6, -7, -8, -9, -10, -11, 12, 13, -14, 15, 16, -17, -18, 19, -20, -21, 22, -23, -24, 25, -26, -27, -28, -29, 30, -31, -32}),  63, 63, 0, 0, 0, 63, 0, 0, 81, 0, 0, 0, 0, 91, -93, -95, 97, 0, 63, 0, 0, 0, 0, 111, 0, 0, 0, 119, 121, -123, 0, 127));
TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  9, 11, 7, 7));
TEST_CONSTEXPR(match_v4si(_mm_mask_sub_epi32((__m128i)(__v4si){ 99, 99, 99, 99}, 0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}), 99, 11, 99, 7));
TEST_CONSTEXPR(match_v4si(_mm_maskz_sub_epi32(0xA, (__m128i)(__v4si){ 8, 9, 10, 11}, (__m128i)(__v4si){ -1, -2, 3, 4}),  0, 11, 0, 7));
TEST_CONSTEXPR(match_v8si(_mm256_sub_epi32((__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  17, 15, 15, -23, 15, -27, 15, 15));
TEST_CONSTEXPR(match_v8si(_mm256_mask_sub_epi32((__m256i)(__v8si){ 99, 99, 99, 99, 99, 99, 99, 99}, 0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}), 17, 99, 99, 99, 99, -27, 99, 15));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_sub_epi32(0xA1, (__m256i)(__v8si){ 16, 17, 18, -19, 20, -21, 22, 23}, (__m256i)(__v8si){ -1, 2, 3, 4, 5, 6, 7, 8}),  17, 0, 0, 0, 0, -27, 0, 15));
TEST_CONSTEXPR(match_v16si(_mm512_sub_epi32((__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -33, -35, -31, 39, 41, 43, -31, -47, 31, 31, -53, 55, -57, -59, 61, -31));
TEST_CONSTEXPR(match_v16si(_mm512_mask_sub_epi32((__m512i)(__v16si){ 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}, 0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}), -33, -35, 99, 99, 41, 43, -31, -47, 31, 31, 99, 99, -57, -59, 99, -31));
TEST_CONSTEXPR(match_v16si(_mm512_maskz_sub_epi32(0xB3F3, (__m512i)(__v16si){ -32, -33, -34, 35, 36, 37, -38, -39, 40, 41, -42, 43, -44, -45, 46, -47}, (__m512i)(__v16si){ 1, 2, -3, -4, -5, -6, -7, 8, 9, 10, 11, -12, 13, 14, -15, -16}),  -33, -35, 0, 0, 41, 43, -31, -47, 31, 31, 0, 0, -57, -59, 0, -31));
TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  3, -3));
TEST_CONSTEXPR(match_v2di(_mm_mask_sub_epi64((__m128i)(__v2di){ 99, 99}, 0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}), 3, -3));
TEST_CONSTEXPR(match_v2di(_mm_maskz_sub_epi64(0x3, (__m128i)(__v2di){ 4, -5}, (__m128i)(__v2di){ 1, -2}),  3, -3));
TEST_CONSTEXPR(match_v4di(_mm256_sub_epi64((__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  9, -7, 7, 7));
TEST_CONSTEXPR(match_v4di(_mm256_mask_sub_epi64((__m256i)(__v4di){ 99, 99, 99, 99}, 0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}), 99, 99, 99, 99));
TEST_CONSTEXPR(match_v4di(_mm256_maskz_sub_epi64(0x0, (__m256i)(__v4di){ 8, -9, 10, 11}, (__m256i)(__v4di){ -1, -2, 3, 4}),  0, 0, 0, 0));
TEST_CONSTEXPR(match_v8di(_mm512_sub_epi64((__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  -15, 19, -15, 15, -25, 27, 29, 15));
TEST_CONSTEXPR(match_v8di(_mm512_mask_sub_epi64((__m512i)(__v8di){ 99, 99, 99, 99, 99, 99, 99, 99}, 0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}), 99, 19, 99, 15, 99, 27, 29, 99));
TEST_CONSTEXPR(match_v8di(_mm512_maskz_sub_epi64(0x6A, (__m512i)(__v8di){ -16, 17, -18, 19, -20, 21, 22, 23}, (__m512i)(__v8di){ -1, -2, -3, 4, 5, -6, -7, 8}),  0, 19, 0, 15, 0, 27, 29, 0));

All tests have been added to their appropriate builtins files

@donneypr
Copy link
Contributor Author

While I was going over my work I realized that some of the mask/maskz intrinsics were not evaluated in CONSTEXPR but we had test cases for them, so I went back and fixed it

@donneypr donneypr requested a review from RKSimon September 20, 2025 15:19
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __v16qi/__v32qi/___v64qi types inside the TEST_CONSTEXPR need converting to __v16qs/__v32qs/___v64qs - sorry my script didn't handle this :/

@donneypr
Copy link
Contributor Author

Thanks for letting me know! Going to switch them around right now.

@donneypr donneypr requested a review from RKSimon September 21, 2025 16:01
@donneypr
Copy link
Contributor Author

Sorry my apologies, I forgot to clang-format avx512vlintrin.h. We should be good now(hopefully 😄) .

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers!

@RKSimon RKSimon enabled auto-merge (squash) September 22, 2025 08:27
@RKSimon RKSimon merged commit 44e71c9 into llvm:main Sep 22, 2025
9 checks passed
Copy link

@donneypr Congratulations on having your first Pull Request (PR) merged into the LLVM Project!

Your changes will be combined with recent changes from other authors, then tested by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR.

Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues.

How to do this, and the rest of the post-merge process, is covered in detail here.

If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again.

If you don't get any reports, no action is required from you. Your changes are working as expected, well done!

@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 22, 2025

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building clang at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/16342

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************


@donneypr
Copy link
Contributor Author

@RKSimon This made my week! Thank you for your support and guidance.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer arithmetic intrinsics to be used in constexpr

4 participants