-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Headers][X86] Use __builtin_elementwise_ctlz instead of avx512cd intrinsics.
#155089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
b7b38d3
293a392
4def3b1
b6b14a7
1f69c71
01d0ec0
df00dab
a988157
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |||||||||
|
|
||||||||||
|
|
||||||||||
| #include <immintrin.h> | ||||||||||
| #include "builtin_test_helpers.h" | ||||||||||
|
|
||||||||||
| __m512i test_mm512_conflict_epi64(__m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_conflict_epi64 | ||||||||||
|
|
@@ -42,35 +43,41 @@ __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { | |||||||||
| } | ||||||||||
| __m512i test_mm512_lzcnt_epi32(__m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_lzcnt_epi32 | ||||||||||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) | ||||||||||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's incorrect here. The
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on the code I see here: llvm-project/clang/lib/CodeGen/CGBuiltin.cpp Lines 3360 to 3363 in 0ad35d7
__builtin_elementwise_ctlz always emits @llvm.ctlz.*(*, i1 true).
However, providing a second argument to
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please can you add checks for the additional icmp and select
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tried to do that in a new commit. PTAL when you can. |
||||||||||
| return _mm512_lzcnt_epi32(__A); | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might be able to improve on this with something like: which might help in the latest checks especially where you have multiple selects
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx for the suggestion. Tried to implement it in a new commit. |
||||||||||
| } | ||||||||||
|
|
||||||||||
| TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 30, 29, 28, 27, 26, 25, 24, 30, 29, 29, 29, 28, 28, 28, 28)); | ||||||||||
|
|
||||||||||
| __m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_mask_lzcnt_epi32 | ||||||||||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) | ||||||||||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) | ||||||||||
| // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} | ||||||||||
| return _mm512_mask_lzcnt_epi32(__W,__U,__A); | ||||||||||
| } | ||||||||||
| __m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_maskz_lzcnt_epi32 | ||||||||||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) | ||||||||||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) | ||||||||||
| // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} | ||||||||||
| return _mm512_maskz_lzcnt_epi32(__U,__A); | ||||||||||
| } | ||||||||||
| __m512i test_mm512_lzcnt_epi64(__m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_lzcnt_epi64 | ||||||||||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) | ||||||||||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) | ||||||||||
| return _mm512_lzcnt_epi64(__A); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 60, 59, 58, 57, 56)); | ||||||||||
|
|
||||||||||
| __m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_mask_lzcnt_epi64 | ||||||||||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) | ||||||||||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) | ||||||||||
| // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} | ||||||||||
| return _mm512_mask_lzcnt_epi64(__W,__U,__A); | ||||||||||
| } | ||||||||||
| __m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { | ||||||||||
| // CHECK-LABEL: test_mm512_maskz_lzcnt_epi64 | ||||||||||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) | ||||||||||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) | ||||||||||
| // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} | ||||||||||
| return _mm512_maskz_lzcnt_epi64(__U,__A); | ||||||||||
| } | ||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://clang.llvm.org/docs/LanguageExtensions.html
You need to provide the second argument as well - in this case it'd be:
(__m512i)__builtin_elementwise_ctlz((__v16si)__A, (__16si)_mm512_set1_epi32(32));There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
An alternative is we keep the __builtin_ia32_vplzcnt builtins and add them to VectorExprEvaluator::VisitCallExpr instead - its annoying not to use the generics, but the __builtin_elementwise_ctlz 2 operand variant will end up generating a ctlz+icmp+select sequence that won't be great in -O0 builds - but its whether we really care about that or not.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect that people using x86 intrinsics are primarily interested in the optimized builds.