Skip to content

Commit 73cf62b

Browse files
authored
[Headers][X86] Use __builtin_elementwise_ctlz instead of avx512cd intrinsics. (llvm#155089)
The following avx512 intrinsics were removed: - `__builtin_ia32_vplzcntd_128` - `__builtin_ia32_vplzcntd_256` - `__builtin_ia32_vplzcntd_512` - `__builtin_ia32_vplzcntq_128` - `__builtin_ia32_vplzcntq_256` - `__builtin_ia32_vplzcntq_512` Users of the removed intrinsics (e.g. `_mm512_lzcnt_epi64`, `_mm_lzcnt_epi32`) are now relying on `__builtin_elementwise_ctlz` and are marked as `constexpr`. Fixes: llvm#154279
1 parent 37cd595 commit 73cf62b

File tree

6 files changed

+168
-105
lines changed

6 files changed

+168
-105
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,8 +1383,6 @@ let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVector
13831383
let Features = "avx512cd,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
13841384
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
13851385
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
1386-
def vplzcntd_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
1387-
def vplzcntq_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
13881386
}
13891387

13901388
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
@@ -2438,22 +2436,6 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
24382436
def rcp14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
24392437
}
24402438

2441-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2442-
def vplzcntd_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
2443-
}
2444-
2445-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2446-
def vplzcntd_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
2447-
}
2448-
2449-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2450-
def vplzcntq_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
2451-
}
2452-
2453-
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2454-
def vplzcntq_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
2455-
}
2456-
24572439
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
24582440
def vcvtsd2si32 : X86Builtin<"int(_Vector<2, double>, _Constant int)">;
24592441
def vcvtsd2usi32 : X86Builtin<"unsigned int(_Vector<2, double>, _Constant int)">;

clang/lib/CodeGen/TargetBuiltins/X86.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,15 +2183,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
21832183
return Builder.CreateBitCast(Res, Ops[0]->getType());
21842184
}
21852185

2186-
case X86::BI__builtin_ia32_vplzcntd_128:
2187-
case X86::BI__builtin_ia32_vplzcntd_256:
2188-
case X86::BI__builtin_ia32_vplzcntd_512:
2189-
case X86::BI__builtin_ia32_vplzcntq_128:
2190-
case X86::BI__builtin_ia32_vplzcntq_256:
2191-
case X86::BI__builtin_ia32_vplzcntq_512: {
2192-
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
2193-
return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
2194-
}
21952186
case X86::BI__builtin_ia32_sqrtss:
21962187
case X86::BI__builtin_ia32_sqrtsd: {
21972188
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);

clang/lib/Headers/avx512cdintrin.h

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
__attribute__((__always_inline__, __nodebug__, \
2020
__target__("avx512cd,evex512"), __min_vector_width__(512)))
2121

22+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24+
#else
25+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
26+
#endif
27+
2228
static __inline__ __m512i __DEFAULT_FN_ATTRS
2329
_mm512_conflict_epi64 (__m512i __A)
2430
{
@@ -63,45 +69,41 @@ _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
6369
(__v16si)_mm512_setzero_si512());
6470
}
6571

66-
static __inline__ __m512i __DEFAULT_FN_ATTRS
67-
_mm512_lzcnt_epi32 (__m512i __A)
68-
{
69-
return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A);
72+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
73+
_mm512_lzcnt_epi32(__m512i __A) {
74+
return (__m512i)__builtin_elementwise_ctlz((__v16si)__A,
75+
(__v16si)_mm512_set1_epi32(32));
7076
}
7177

72-
static __inline__ __m512i __DEFAULT_FN_ATTRS
73-
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
74-
{
78+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
79+
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
7580
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
7681
(__v16si)_mm512_lzcnt_epi32(__A),
7782
(__v16si)__W);
7883
}
7984

80-
static __inline__ __m512i __DEFAULT_FN_ATTRS
81-
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
82-
{
85+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
86+
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
8387
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
8488
(__v16si)_mm512_lzcnt_epi32(__A),
8589
(__v16si)_mm512_setzero_si512());
8690
}
8791

88-
static __inline__ __m512i __DEFAULT_FN_ATTRS
89-
_mm512_lzcnt_epi64 (__m512i __A)
90-
{
91-
return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A);
92+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
93+
_mm512_lzcnt_epi64(__m512i __A) {
94+
return (__m512i)__builtin_elementwise_ctlz(
95+
(__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
9296
}
9397

94-
static __inline__ __m512i __DEFAULT_FN_ATTRS
95-
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
96-
{
98+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
99+
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
97100
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
98101
(__v8di)_mm512_lzcnt_epi64(__A),
99102
(__v8di)__W);
100103
}
101104

102-
static __inline__ __m512i __DEFAULT_FN_ATTRS
103-
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
104-
{
105+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
106+
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
105107
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
106108
(__v8di)_mm512_lzcnt_epi64(__A),
107109
(__v8di)_mm512_setzero_si512());
@@ -121,5 +123,6 @@ _mm512_broadcastmw_epi32 (__mmask16 __A)
121123
}
122124

123125
#undef __DEFAULT_FN_ATTRS
126+
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
124127

125128
#endif

clang/lib/Headers/avx512vlcdintrin.h

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@
2323
__target__("avx512vl,avx512cd,no-evex512"), \
2424
__min_vector_width__(256)))
2525

26+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
27+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
28+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29+
#else
30+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
31+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
32+
#endif
33+
2634
static __inline__ __m128i __DEFAULT_FN_ATTRS128
2735
_mm_broadcastmb_epi64 (__mmask8 __A)
2836
{
@@ -136,95 +144,89 @@ _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
136144
(__v8si)_mm256_setzero_si256());
137145
}
138146

139-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
140-
_mm_lzcnt_epi32 (__m128i __A)
141-
{
142-
return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
147+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
148+
_mm_lzcnt_epi32(__m128i __A) {
149+
return (__m128i)__builtin_elementwise_ctlz((__v4si)__A,
150+
(__v4si)_mm_set1_epi32(32));
143151
}
144152

145-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
146-
_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
147-
{
153+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
154+
_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
148155
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
149156
(__v4si)_mm_lzcnt_epi32(__A),
150157
(__v4si)__W);
151158
}
152159

153-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
154-
_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
155-
{
160+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
161+
_mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) {
156162
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
157163
(__v4si)_mm_lzcnt_epi32(__A),
158164
(__v4si)_mm_setzero_si128());
159165
}
160166

161-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
162-
_mm256_lzcnt_epi32 (__m256i __A)
163-
{
164-
return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
167+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
168+
_mm256_lzcnt_epi32(__m256i __A) {
169+
return (__m256i)__builtin_elementwise_ctlz((__v8si)__A,
170+
(__v8si)_mm256_set1_epi32(32));
165171
}
166172

167-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
168-
_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
169-
{
173+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
174+
_mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
170175
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
171176
(__v8si)_mm256_lzcnt_epi32(__A),
172177
(__v8si)__W);
173178
}
174179

175-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
176-
_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
177-
{
180+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
181+
_mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) {
178182
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
179183
(__v8si)_mm256_lzcnt_epi32(__A),
180184
(__v8si)_mm256_setzero_si256());
181185
}
182186

183-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
184-
_mm_lzcnt_epi64 (__m128i __A)
185-
{
186-
return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
187+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
188+
_mm_lzcnt_epi64(__m128i __A) {
189+
return (__m128i)__builtin_elementwise_ctlz(
190+
(__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64));
187191
}
188192

189-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
190-
_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
191-
{
193+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
194+
_mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
192195
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
193196
(__v2di)_mm_lzcnt_epi64(__A),
194197
(__v2di)__W);
195198
}
196199

197-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
198-
_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
199-
{
200+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
201+
_mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) {
200202
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
201203
(__v2di)_mm_lzcnt_epi64(__A),
202204
(__v2di)_mm_setzero_si128());
203205
}
204206

205-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
206-
_mm256_lzcnt_epi64 (__m256i __A)
207-
{
208-
return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
207+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
208+
_mm256_lzcnt_epi64(__m256i __A) {
209+
return (__m256i)__builtin_elementwise_ctlz(
210+
(__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64));
209211
}
210212

211-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
212-
_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
213-
{
213+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
214+
_mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
214215
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
215216
(__v4di)_mm256_lzcnt_epi64(__A),
216217
(__v4di)__W);
217218
}
218219

219-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
220-
_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
221-
{
220+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
221+
_mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
222222
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
223223
(__v4di)_mm256_lzcnt_epi64(__A),
224224
(__v4di)_mm256_setzero_si256());
225225
}
226226

227227
#undef __DEFAULT_FN_ATTRS128
228228
#undef __DEFAULT_FN_ATTRS256
229+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
230+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
229231

230232
#endif /* __AVX512VLCDINTRIN_H */

clang/test/CodeGen/X86/avx512cd-builtins.c

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
#include <immintrin.h>
8+
#include "builtin_test_helpers.h"
89

910
__m512i test_mm512_conflict_epi64(__m512i __A) {
1011
// CHECK-LABEL: test_mm512_conflict_epi64
@@ -42,39 +43,70 @@ __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
4243
}
4344
__m512i test_mm512_lzcnt_epi32(__m512i __A) {
4445
// CHECK-LABEL: test_mm512_lzcnt_epi32
45-
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false)
46+
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
47+
// CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer
48+
// CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
4649
return _mm512_lzcnt_epi32(__A);
4750
}
51+
52+
TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 30, 29, 28, 27, 26, 25, 24, 30, 29, 29, 29, 28, 28, 28, 28));
53+
TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}), 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32));
54+
4855
__m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
4956
// CHECK-LABEL: test_mm512_mask_lzcnt_epi32
50-
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false)
57+
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
58+
// CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer
59+
// CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
5160
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
5261
return _mm512_mask_lzcnt_epi32(__W,__U,__A);
5362
}
63+
64+
TEST_CONSTEXPR(match_v16si(_mm512_mask_lzcnt_epi32(_mm512_set1_epi32(32), /*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 32, 29, 28, 32, 26, 32, 24, 32, 32, 29, 29, 32, 28, 32, 28));
65+
5466
__m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
5567
// CHECK-LABEL: test_mm512_maskz_lzcnt_epi32
56-
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false)
68+
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
69+
// CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer
70+
// CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
5771
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
5872
return _mm512_maskz_lzcnt_epi32(__U,__A);
5973
}
74+
75+
TEST_CONSTEXPR(match_v16si(_mm512_maskz_lzcnt_epi32(/*1010 1100 1010 1101=*/0xacad, (__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 0, 29, 28, 0, 26, 0, 24, 0, 0, 29, 29, 0, 28, 0, 28));
76+
6077
__m512i test_mm512_lzcnt_epi64(__m512i __A) {
6178
// CHECK-LABEL: test_mm512_lzcnt_epi64
62-
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false)
79+
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true)
80+
// CHECK: [[ISZERO:%.+]] = icmp eq <8 x i64> %{{.*}}, zeroinitializer
81+
// CHECK: select <8 x i1> [[ISZERO]], <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
6382
return _mm512_lzcnt_epi64(__A);
6483
}
84+
85+
TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 60, 59, 58, 57, 56));
86+
TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0}), 64, 64, 64, 64, 64, 64, 64, 64));
87+
6588
__m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
6689
// CHECK-LABEL: test_mm512_mask_lzcnt_epi64
67-
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false)
90+
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true)
91+
// CHECK: [[ISZERO:%.+]] = icmp eq <8 x i64> %{{.*}}, zeroinitializer
92+
// CHECK: select <8 x i1> [[ISZERO]], <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
6893
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
6994
return _mm512_mask_lzcnt_epi64(__W,__U,__A);
7095
}
96+
97+
TEST_CONSTEXPR(match_v8di(_mm512_mask_lzcnt_epi64(_mm512_set1_epi64((long long) 64), /*0101 0111=*/0x57, (__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 64, 59, 64, 57, 64));
98+
7199
__m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
72100
// CHECK-LABEL: test_mm512_maskz_lzcnt_epi64
73-
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false)
101+
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true)
102+
// CHECK: [[ISZERO:%.+]] = icmp eq <8 x i64> %{{.*}}, zeroinitializer
103+
// CHECK: select <8 x i1> [[ISZERO]], <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
74104
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
75105
return _mm512_maskz_lzcnt_epi64(__U,__A);
76106
}
77107

108+
TEST_CONSTEXPR(match_v8di(_mm512_maskz_lzcnt_epi64(/*0101 0111=*/0x57, (__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 0, 59, 0, 57, 0));
109+
78110
__m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) {
79111
// CHECK-LABEL: test_mm512_broadcastmb_epi64
80112
// CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}}

0 commit comments

Comments
 (0)