-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Headers][X86] Use __builtin_elementwise_ctlz instead of avx512cd intrinsics.
#155089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
b7b38d3
293a392
4def3b1
b6b14a7
1f69c71
01d0ec0
df00dab
a988157
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
|
|
||
|
|
||
| #include <immintrin.h> | ||
| #include "builtin_test_helpers.h" | ||
|
|
||
| __m512i test_mm512_conflict_epi64(__m512i __A) { | ||
| // CHECK-LABEL: test_mm512_conflict_epi64 | ||
|
|
@@ -42,39 +43,58 @@ __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { | |
| } | ||
| __m512i test_mm512_lzcnt_epi32(__m512i __A) { | ||
| // CHECK-LABEL: test_mm512_lzcnt_epi32 | ||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) | ||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) | ||
| return _mm512_lzcnt_epi32(__A); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might be able to improve on this with something like: which might help in the latest checks especially where you have multiple selects
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx for the suggestion. Tried to implement it in a new commit. |
||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 30, 29, 28, 27, 26, 25, 24, 30, 29, 29, 29, 28, 28, 28, 28)); | ||
| TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32)); | ||
|
|
||
| __m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { | ||
| // CHECK-LABEL: test_mm512_mask_lzcnt_epi32 | ||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) | ||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) | ||
| // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} | ||
| return _mm512_mask_lzcnt_epi32(__W,__U,__A); | ||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v16si(_mm512_mask_lzcnt_epi32(_mm512_set1_epi32(32), /*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 32, 29, 28, 32, 26, 32, 24, 32, 32, 29, 29, 32, 28, 32, 28)); | ||
|
|
||
| __m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { | ||
| // CHECK-LABEL: test_mm512_maskz_lzcnt_epi32 | ||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false) | ||
| // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) | ||
| // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} | ||
| return _mm512_maskz_lzcnt_epi32(__U,__A); | ||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v16si(_mm512_maskz_lzcnt_epi32(/*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 0, 29, 28, 0, 26, 0, 24, 0, 0, 29, 29, 0, 28, 0, 28)); | ||
|
|
||
| __m512i test_mm512_lzcnt_epi64(__m512i __A) { | ||
| // CHECK-LABEL: test_mm512_lzcnt_epi64 | ||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) | ||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) | ||
| return _mm512_lzcnt_epi64(__A); | ||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 60, 59, 58, 57, 56)); | ||
| TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0}), 64, 64, 64, 64, 64, 64, 64, 64)); | ||
|
|
||
| __m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { | ||
| // CHECK-LABEL: test_mm512_mask_lzcnt_epi64 | ||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) | ||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) | ||
| // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} | ||
| return _mm512_mask_lzcnt_epi64(__W,__U,__A); | ||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v8di(_mm512_mask_lzcnt_epi64(_mm512_set1_epi64((long long) 64), /*0101 0111=*/0x57, (__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 64, 59, 64, 57, 64)); | ||
|
|
||
| __m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { | ||
| // CHECK-LABEL: test_mm512_maskz_lzcnt_epi64 | ||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false) | ||
| // CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true) | ||
| // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} | ||
| return _mm512_maskz_lzcnt_epi64(__U,__A); | ||
| } | ||
|
|
||
| TEST_CONSTEXPR(match_v8di(_mm512_maskz_lzcnt_epi64(/*0101 0111=*/0x57, (__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 0, 59, 0, 57, 0)); | ||
|
|
||
| __m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) { | ||
| // CHECK-LABEL: test_mm512_broadcastmb_epi64 | ||
| // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's incorrect here. The
is_zero_poisonargument needs to stayfalse. The same below.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the code I see here:
llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
Lines 3360 to 3363 in 0ad35d7
__builtin_elementwise_ctlzalways emits@llvm.ctlz.*(*, i1 true).However, providing a second argument to
__builtin_elementwise_ctlzis similar to havingis_zero_poisonset to false. That's my understanding based on https://clang.llvm.org/docs/LanguageExtensions.html#vector-builtins.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you add checks for the additional icmp and select
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tried to do that in a new commit. PTAL when you can.