-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer arithmetic intrinsics to be used in constexpr #157582
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: don (donneypr) ChangesFixes #152490 Full diff: https://github.com/llvm/llvm-project/pull/157582.diff 3 Files Affected:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2cacdc3c4596c..5c8c2996229c6 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -279,7 +279,7 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
/// \param __b
/// A 256-bit integer vector containing one of the source operands.
/// \returns A 256-bit integer vector containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi8(__m256i __a, __m256i __b)
{
return (__m256i)((__v32qu)__a + (__v32qu)__b);
@@ -298,7 +298,7 @@ _mm256_add_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi16(__m256i __a, __m256i __b)
{
return (__m256i)((__v16hu)__a + (__v16hu)__b);
@@ -317,7 +317,7 @@ _mm256_add_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a + (__v8su)__b);
@@ -336,7 +336,7 @@ _mm256_add_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the sums.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_add_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a + (__v4du)__b);
@@ -2462,7 +2462,7 @@ _mm256_srl_epi64(__m256i __a, __m128i __count)
/// \param __b
/// A 256-bit integer vector containing the subtrahends.
/// \returns A 256-bit integer vector containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi8(__m256i __a, __m256i __b)
{
return (__m256i)((__v32qu)__a - (__v32qu)__b);
@@ -2489,7 +2489,7 @@ _mm256_sub_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16] containing the subtrahends.
/// \returns A 256-bit vector of [16 x i16] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi16(__m256i __a, __m256i __b)
{
return (__m256i)((__v16hu)__a - (__v16hu)__b);
@@ -2515,7 +2515,7 @@ _mm256_sub_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing the subtrahends.
/// \returns A 256-bit vector of [8 x i32] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi32(__m256i __a, __m256i __b)
{
return (__m256i)((__v8su)__a - (__v8su)__b);
@@ -2541,7 +2541,7 @@ _mm256_sub_epi32(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x i64] containing the subtrahends.
/// \returns A 256-bit vector of [4 x i64] containing the differences.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sub_epi64(__m256i __a, __m256i __b)
{
return (__m256i)((__v4du)__a - (__v4du)__b);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 67499fd83a089..1d3aeb284b00c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -859,7 +859,7 @@ _mm512_add_epi64(__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A + (__v8du) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -867,7 +867,7 @@ _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -875,13 +875,13 @@ _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sub_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A - (__v8du) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -889,7 +889,7 @@ _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -897,7 +897,7 @@ _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_add_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A + (__v16su) __B);
@@ -919,7 +919,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_sub_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A - (__v16su) __B);
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index a366e0df407a9..c99c85f26c6d1 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2060,7 +2060,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp,
/// A 128-bit vector of [16 x i8].
/// \returns A 128-bit vector of [16 x i8] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v16qu)__a + (__v16qu)__b);
}
@@ -2081,7 +2081,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a,
/// A 128-bit vector of [8 x i16].
/// \returns A 128-bit vector of [8 x i16] containing the sums of both
/// parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v8hu)__a + (__v8hu)__b);
}
@@ -2499,7 +2499,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v16qu)__a - (__v16qu)__b);
}
@@ -2516,7 +2516,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16_CONSTEXPR(__m128i __a,
__m128i __b) {
return (__m128i)((__v8hu)__a - (__v8hu)__b);
}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The next (tedious) step is to add test coverage - see #156369 for examples
@RKSimon Thank you! Will get back to you soon. |
1d5c969
to
450b4a0
Compare
@RKSimon , I'm a little confused as to why the build is failing and what else I would need to add/change, would you be able to point me to the right direction? Thank you 😄 . |
450b4a0
to
8b22bd7
Compare
Reverted my tests, working on them again from the start. |
ecc02e5
to
287254a
Compare
@RKSimon I've clang-formatted it and everything seems good, I think I'm ready to write some tests, please confirm and let me know. Thank you! |
@donneypr I've been playing with a script to help autogen the tests as that seems to be bottleneck for people - see if these work OK (they still need putting in the correct builtins files):
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you handle the MMX intrinsics as well?
_mm_add_pi8 _mm_add_pi16 _mm_add_pi32
_mm_sub_pi8 _mm_sub_pi16 _mm_sub_pi32
@RKSimon from what I can see in clang/lib/Headers/mmintrin.h, it looks like all the intrinsics you mentioned are already being used in CONSTEXPR, shall I make test cases for it? Thanks for providing the test cases for the others, I will get them in the right builtins file and get back to you |
Ah! I forgot I'd done these ages ago - they should already have test coverage if I did it properly. Thanks for checking. |
All tests have been added to their appropriate builtins files |
While I was going over my work I realized that some of the mask/maskz intrinsics were not evaluated in CONSTEXPR but we had test cases for them, so I went back and fixed it |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The __v16qi/__v32qi/___v64qi types inside the TEST_CONSTEXPR need converting to __v16qs/__v32qs/___v64qs - sorry my script didn't handle this :/
Thanks for letting me know! Going to switch them around right now. |
…/__v32qs/__v64qs (signed) llvm#152490
Sorry my apologies, I forgot to clang-format avx512vlintrin.h. We should be good now(hopefully 😄) . |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers!
@donneypr Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR. Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues. How to do this, and the rest of the post-merge process, is covered in detail here. If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/16342 Here is the relevant piece of the build log for the reference
|
@RKSimon This made my week! Thank you for your support and guidance. |
Fixes #152490