-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Headers][X86] Allow MMX/SSE2/AVX2/AVX512BW integer saturated arithmetic intrinsics to be used in constexpr #153088
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
c41317b to
9e4d8c9
Compare
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Yatao Wang (ningxinr) ChangesIssue #152506 Patch is 20.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153088.diff 8 Files Affected:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f00a8a516ecfe..b2e0b701c95ec 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -367,9 +367,8 @@ _mm256_add_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector containing one of the source operands.
/// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_adds_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_adds_epi8(__m256i __a, __m256i __b) {
return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
}
@@ -385,9 +384,8 @@ _mm256_adds_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_adds_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_adds_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
}
@@ -2607,9 +2605,8 @@ _mm256_sub_epi64(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector containing the subtrahends.
/// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_subs_epi8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_subs_epi8(__m256i __a, __m256i __b) {
return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b);
}
@@ -2633,9 +2630,8 @@ _mm256_subs_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing the subtrahends.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_subs_epi16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_subs_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index d7377e5d5196c..57a261f30eadb 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -613,9 +613,8 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_adds_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_adds_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
}
@@ -635,9 +634,8 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_adds_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_adds_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
}
@@ -946,9 +944,8 @@ _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_subs_epi8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_subs_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
}
@@ -968,9 +965,8 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_subs_epi16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_subs_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 60d2000dfb809..8936f3110050f 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2170,8 +2170,8 @@ _mm_add_epi64(__m128i __a, __m128i __b) {
/// A 128-bit signed [16 x i8] vector.
/// \returns A 128-bit signed [16 x i8] vector containing the saturated sums of
/// both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_adds_epi8(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b);
}
@@ -2192,8 +2192,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
/// A 128-bit signed [8 x i16] vector.
/// \returns A 128-bit signed [8 x i16] vector containing the saturated sums of
/// both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_adds_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b);
}
@@ -2597,8 +2597,8 @@ _mm_sub_epi64(__m128i __a, __m128i __b) {
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_subs_epi8(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
}
@@ -2619,8 +2619,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_subs_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
}
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 3961b790cea74..afb93ccba781c 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -448,10 +448,9 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_adds_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
}
/// Adds, with saturation, each 16-bit signed integer element of the first
@@ -472,10 +471,9 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_adds_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
}
/// Adds, with saturation, each 8-bit unsigned integer element of the first
@@ -605,10 +603,9 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_subs_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
}
/// Subtracts, with saturation, each 16-bit signed integer element of the
@@ -629,10 +626,9 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_subs_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
}
/// Subtracts each 8-bit unsigned integer element of the second 64-bit
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 8790485f00a8c..ce279b65a399d 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -60,12 +60,14 @@ __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
// CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_adds_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_adds_epi8(_mm256_setr_epi8(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm256_setr_epi8(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +50, +80, -50, +110, +60, -30, +20, -10, +50, +80, -50, +110, +60, -30, +20, -10)), 0, +2, +4, +6, +8, +10, +12, +14, +16, +18, +20, +22, +24, +26, +28, +30, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10));
__m256i test_mm256_adds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_adds_epi16
// CHECK: call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_adds_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_adds_epi16(_mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, +32000, -32000, +32000, -32000), _mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, +800, -800, -800, +800)),0, -2, -4, -6, -8, -10, -12, -14, -16, -18, -20, -22, +32767, -32768, +31200, -31200));
__m256i test_mm256_adds_epu8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_adds_epu8
@@ -1293,12 +1295,14 @@ __m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
// CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_subs_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_subs_epi8(_mm256_setr_epi8(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm256_setr_epi8(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -50, -80, +50, -110, -60, +30, -20, +10, -50, -80, +50, -110, -60, +30, -20, +10)), 0, +2, +4, +6, +8, +10, +12, +14, +16, +18, +20, +22, +24, +26, +28, +30, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10));
__m256i test_mm256_subs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_subs_epi16
// CHECK: call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_subs_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_subs_epi16(_mm256_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, +32000, -32000, +32000, -32000), _mm256_setr_epi16(0, +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, -800, +800, +800, -800)),0, -2, -4, -6, -8, -10, -12, -14, -16, -18, -20, -22, +32767, -32768, +31200, -31200));
__m256i test_mm256_subs_epu8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_subs_epu8
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 2e3e13d2769c4..c2426dc0e6b9d 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -957,6 +957,8 @@ __m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
// CHECK: @llvm.sadd.sat.v64i8
return _mm512_adds_epi8(__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_adds_epi8((__m512i)(__v64qi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}, (__m512i)(__v64qi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +50, +80, -50, +110, +60, -30, +20, -10, +50, +80, -50, +110, +60, -30, +20, -10}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, -56, +58, -60, +62, -64, +66, -68, +70, -72, +74, -76, +78, -80, +82, -84, +86, -88, +90, -92, +94, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10));
+
__m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_adds_epi8
// CHECK: @llvm.sadd.sat.v64i8
@@ -974,6 +976,8 @@ __m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
// CHECK: @llvm.sadd.sat.v32i16
return _mm512_adds_epi16(__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_adds_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}, (__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +800, -800, -800, +800}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, +32767, -32768, +31200, -31200));
+
__m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_adds_epi16
// CHECK: @llvm.sadd.sat.v32i16
@@ -1218,6 +1222,8 @@ __m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
// CHECK: @llvm.ssub.sat.v64i8
return _mm512_subs_epi8(__A,__B);
}
+TEST_CONSTEXPR(match_v64qi(_mm512_subs_epi8((__m512i)(__v64qi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32, +33, -34, +35, -36, +37, -38, +39, -40, +41, -42, +43, -44, +45, -46, +47, +100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20}, (__m512i)(__v64qi){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34, -35, +36, -37, +38, -39, +40, -41, +42, -43, +44, -45, +46, -47, -50, -80, +50, -110, -60, +30, -20, +10, -50, -80, +50, -110, -60, +30, -20, +10}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, -56, +58, -60, +62, -64, +66, -68, +70, -72, +74, -76, +78, -80, +82, -84, +86, -88, +90, -92, +94, +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10));
+
__m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_subs_epi8
// CHECK: @llvm.ssub.sat.v64i8
@@ -1235,6 +1241,7 @@ __m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
// CHECK: @llvm.ssub.sat.v32i16
return _mm512_subs_epi16(__A,__B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_subs_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}, (__m512i)(__v32hi){0, -1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, -800, +800, +800, -800}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, +32767, -32768, +31200, -31200));
__m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_subs_epi16
// CHECK: @llvm.ssub.sat.v32i16
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index d47368c207f4b..29d31f85f2bc5 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -62,12 +62,14 @@ __m64 test_mm_adds_pi8(__m64 a, __m64 b) {
// CHECK: call <8 x i8> @llvm.sadd.sat.v8i8(
return _mm_adds_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_adds_pi8(_mm_setr_pi8(+100, +50, -100, +20, +80, -50, +120, -20), _mm_setr_pi8(+50, +80, -50, +110, +60, -30, +20, -10)), +127, +127, -128, +127, +127, -80, +127, -30));
__m64 test_mm_adds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pi16
// CHECK: call <4 x i16> @llvm.sadd.sat.v4i16(
return _mm_adds_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_adds_pi16((__m64)(__v4hi){+32000, -32000, +32000, -32000}, (__m64)(__v4hi){+800, -800, -800, +800}), +32767, -32768, +31200, -31200));
__m64 test_mm_adds_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_adds_pu8
@@ -718,12 +720,14 @@ __m64 test_mm_subs_pi8(__m64 a, __m64 b) {
// CHECK: call <8 x i8> @llvm.ssub.sat.v8i8(
return _mm_subs_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_subs_pi8(_mm_setr_pi8(+100, +50, -100, +20, +80, -50, +120, -20), _mm_setr_pi8(-50, -80, +50, -110, -60, +30, -20, +10)), +127, +127, -128, +127, +127, -80, +127, -30));
__m64 test_mm_subs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pi16
// CHECK: call <4 x i16> @llvm.ssub.sat.v4i16(
return _mm_subs_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_subs_pi16((__m64)(__v4hi){+32000, -32000, +32000, -32000}, (__m64)(__v4hi){-800, +800, +800, -800}), +32767, -32768, +31200, -31200));
__m64 test_mm_subs_pu8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_subs_pu8
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 23013dd75d641..04b357b88817c 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -63,12 +63,14 @@ __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
// CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_adds_epi8(A, B);
}
+TEST_CONSTEXPR(match_v16qi(_mm_adds_epi8(_mm_setr_epi8(+100, +50, -100, +20, +80, -50, +120, -20, -100, -50, +100, -20, -80, +50, -120, +20), _mm_setr_epi8(+50, +80, -50, +110, +60, -30, +20, -10, +50, +80, -50, +110, +60, -30, +20, -10)), +127, +127, -128, +127, +127, -80, +127, -30, -50, +30, +50, +90, -20, +20, -100, +10));
__m128i test_mm_adds_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_adds_epi16
// CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_adds_epi16(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_adds_epi16(_mm_setr_epi16(+32000, -32000, +32000, -32000, +80, -50, +120, -20), _mm_setr_epi16(+800, -800, -800, +800, +60, -30, +20, -10)), +32767, -32768, +31200, -31200, +140, -80, +140, -30));
__m128i test_mm_adds_epu8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_adds_epu8
@@ -1691,12 +1693,14 @@ __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
// CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_subs_epi8(A, B);
}
+TEST_CONSTE...
[truncated]
|
|
Hi Simon, I don't think I have the permission to add reviewers. Would you please take a look when you get a chance? Thank you thank you! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use (__m256i)(__v32qi){...} instead. The same below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the -fno-signed-char tests are a problem you can use __v32qs instead of __v32qi - as a guideline we're trying to avoid nested constexpr intrinsics in the tests.
|
@ningxinr I've just realized that I didn't request the unsigned saturation equivalents (pi -> pu) - would you be interested in adding those to this patch or should I raise an additional issue? |
Nah, don't bother creating another issue, let me just add them to this patch. I will try to finish it by the end of the day. Thanks~ |
…e used in constexpr
…be used in constexpr
…be used in constexpr
… to be used in constexpr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - thank you (and sorry again for missing the usat cases!)
All good! Thanks for approving and helping me merging the change! 😄 |
This PR allows the following MMX/SSE2/AVX2/AVX512BW integer saturated arithmetic intrinsics to be used in constexpr
Fixes #152506