Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1383,8 +1383,6 @@ let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVector
let Features = "avx512cd,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
def vplzcntd_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
def vplzcntq_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
}

let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -2438,22 +2436,6 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def rcp14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
}

let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vplzcntd_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
}

let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vplzcntd_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
}

let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vplzcntq_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
}

let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vplzcntq_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vcvtsd2si32 : X86Builtin<"int(_Vector<2, double>, _Constant int)">;
def vcvtsd2usi32 : X86Builtin<"unsigned int(_Vector<2, double>, _Constant int)">;
Expand Down
9 changes: 0 additions & 9 deletions clang/lib/CodeGen/TargetBuiltins/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2183,15 +2183,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateBitCast(Res, Ops[0]->getType());
}

case X86::BI__builtin_ia32_vplzcntd_128:
case X86::BI__builtin_ia32_vplzcntd_256:
case X86::BI__builtin_ia32_vplzcntd_512:
case X86::BI__builtin_ia32_vplzcntq_128:
case X86::BI__builtin_ia32_vplzcntq_256:
case X86::BI__builtin_ia32_vplzcntq_512: {
Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
}
case X86::BI__builtin_ia32_sqrtss:
case X86::BI__builtin_ia32_sqrtsd: {
Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
Expand Down
21 changes: 13 additions & 8 deletions clang/lib/Headers/avx512cdintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512cd,evex512"), __min_vector_width__(512)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
#else
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi64 (__m512i __A)
{
Expand Down Expand Up @@ -63,10 +69,9 @@ _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
(__v16si)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_lzcnt_epi32 (__m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A);
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_lzcnt_epi32(__m512i __A) {
return (__m512i)__builtin_elementwise_ctlz((__v16si)__A);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://clang.llvm.org/docs/LanguageExtensions.html

You need to provide the second argument as well - in this case it'd be:
(__m512i)__builtin_elementwise_ctlz((__v16si)__A, (__16si)_mm512_set1_epi32(32));

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative is we keep the __builtin_ia32_vplzcnt builtins and add them to VectorExprEvaluator::VisitCallExpr instead - its annoying not to use the generics, but the __builtin_elementwise_ctlz 2 operand variant will end up generating a ctlz+icmp+select sequence that won't be great in -O0 builds - but its whether we really care about that or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect that people using x86 intrinsics are primarily interested in the optimized builds.

}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand All @@ -85,10 +90,9 @@ _mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
(__v16si)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_lzcnt_epi64 (__m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A);
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_lzcnt_epi64(__m512i __A) {
return (__m512i)__builtin_elementwise_ctlz((__v8di)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand Down Expand Up @@ -121,5 +125,6 @@ _mm512_broadcastmw_epi32 (__mmask16 __A)
}

#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_CONSTEXPR

#endif
38 changes: 22 additions & 16 deletions clang/lib/Headers/avx512vlcdintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
__target__("avx512vl,avx512cd,no-evex512"), \
__min_vector_width__(256)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#else
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmb_epi64 (__mmask8 __A)
{
Expand Down Expand Up @@ -136,10 +144,9 @@ _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
(__v8si)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_lzcnt_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_lzcnt_epi32(__m128i __A) {
return (__m128i)__builtin_elementwise_ctlz((__v4si)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand All @@ -158,10 +165,9 @@ _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
(__v4si)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_lzcnt_epi32 (__m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_lzcnt_epi32(__m256i __A) {
return (__m256i)__builtin_elementwise_ctlz((__v8si)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
Expand All @@ -180,10 +186,9 @@ _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
(__v8si)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_lzcnt_epi64 (__m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_lzcnt_epi64(__m128i __A) {
return (__m128i)__builtin_elementwise_ctlz((__v2di)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand All @@ -202,10 +207,9 @@ _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
(__v2di)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_lzcnt_epi64 (__m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_lzcnt_epi64(__m256i __A) {
return (__m256i)__builtin_elementwise_ctlz((__v4di)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
Expand All @@ -226,5 +230,7 @@ _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR

#endif /* __AVX512VLCDINTRIN_H */
19 changes: 13 additions & 6 deletions clang/test/CodeGen/X86/avx512cd-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


#include <immintrin.h>
#include "builtin_test_helpers.h"

__m512i test_mm512_conflict_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_conflict_epi64
Expand Down Expand Up @@ -42,35 +43,41 @@ __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
}
__m512i test_mm512_lzcnt_epi32(__m512i __A) {
// CHECK-LABEL: test_mm512_lzcnt_epi32
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false)
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's incorrect here. The is_zero_poison argument needs to stay false. The same below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the code I see here:

// The elementwise builtins always exhibit zero-is-undef behaviour
Value *ZeroUndef = Builder.getInt1(
HasFallback || getTarget().isCLZForZeroUndef() ||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_cttz);

__builtin_elementwise_ctlz always emits @llvm.ctlz.*(*, i1 true).

However, providing a second argument to __builtin_elementwise_ctlz is similar to having is_zero_poison set to false. That's my understanding based on https://clang.llvm.org/docs/LanguageExtensions.html#vector-builtins.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you add checks for the additional icmp and select

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried to do that in a new commit. PTAL when you can.

return _mm512_lzcnt_epi32(__A);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you might be able to improve on this with something like:

// CHECK: [[ISZERO:%.+]] = icmp eq <16 x i32> %{{.*}}, zeroinitializer
// CHECK: select <16 x i1> [[ISZERO]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}}

which might help in the latest checks especially where you have multiple selects

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thx for the suggestion. Tried to implement it in a new commit.
LMK if it would be better to avoid redefining [[ISZERO]] each time and instead reuse the existing value whenever possible.

}

TEST_CONSTEXPR(match_v16si(_mm512_lzcnt_epi32((__m512i)(__v16si){1, 2, 4, 8, 16, 32, 64, 128, 3, 5, 6, 7, 9, 10, 11, 12}), 31, 30, 29, 28, 27, 26, 25, 24, 30, 29, 29, 29, 28, 28, 28, 28));

__m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_lzcnt_epi32
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false)
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_lzcnt_epi32(__W,__U,__A);
}
__m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_lzcnt_epi32
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 false)
// CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_lzcnt_epi32(__U,__A);
}
__m512i test_mm512_lzcnt_epi64(__m512i __A) {
// CHECK-LABEL: test_mm512_lzcnt_epi64
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true)
return _mm512_lzcnt_epi64(__A);
}

TEST_CONSTEXPR(match_v8di(_mm512_lzcnt_epi64((__m512i)(__v8di){1, 2, 4, 8, 16, 32, 64, 128}), 63, 62, 61, 60, 59, 58, 57, 56));

__m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_lzcnt_epi64
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true)
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_lzcnt_epi64(__W,__U,__A);
}
__m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_lzcnt_epi64
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<8 x i64> @llvm.ctlz.v8i64(<8 x i64> %{{.*}}, i1 true)
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_lzcnt_epi64(__U,__A);
}
Expand Down
33 changes: 21 additions & 12 deletions clang/test/CodeGen/X86/avx512vlcd-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


#include <immintrin.h>
#include "builtin_test_helpers.h"

__m128i test_mm_broadcastmb_epi64(__m128i a,__m128i b) {
// CHECK-LABEL: test_mm_broadcastmb_epi64
Expand Down Expand Up @@ -136,80 +137,88 @@ __m256i test_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {

__m128i test_mm_lzcnt_epi32(__m128i __A) {
// CHECK-LABEL: test_mm_lzcnt_epi32
// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false)
// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true)
return _mm_lzcnt_epi32(__A);
}

TEST_CONSTEXPR(match_v4si(_mm_lzcnt_epi32((__m128i)(__v4si){8, 16, 32, 64}), 28, 27, 26, 25));

__m128i test_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_lzcnt_epi32
// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false)
// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true)
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_lzcnt_epi32(__W, __U, __A);
}

__m128i test_mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_lzcnt_epi32
// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 false)
// CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true)
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_lzcnt_epi32(__U, __A);
}

__m256i test_mm256_lzcnt_epi32(__m256i __A) {
// CHECK-LABEL: test_mm256_lzcnt_epi32
// CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 false)
// CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 true)
return _mm256_lzcnt_epi32(__A);
}

TEST_CONSTEXPR(match_v8si(_mm256_lzcnt_epi32((__m256i)(__v8si){1, 2, 4, 8, 16, 32, 64, 128}), 31, 30, 29, 28, 27, 26, 25, 24));

__m256i test_mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_lzcnt_epi32
// CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 false)
// CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 true)
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_lzcnt_epi32(__W, __U, __A);
}

__m256i test_mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_lzcnt_epi32
// CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 false)
// CHECK: call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %{{.*}}, i1 true)
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_lzcnt_epi32(__U, __A);
}

__m128i test_mm_lzcnt_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_lzcnt_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 true)
return _mm_lzcnt_epi64(__A);
}

TEST_CONSTEXPR(match_v2di(_mm_lzcnt_epi64((__m128i)(__v2di){1, 2}), 63, 62));

__m128i test_mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_lzcnt_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 true)
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_lzcnt_epi64(__W, __U, __A);
}

__m128i test_mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_lzcnt_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<2 x i64> @llvm.ctlz.v2i64(<2 x i64> %{{.*}}, i1 true)
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_lzcnt_epi64(__U, __A);
}

__m256i test_mm256_lzcnt_epi64(__m256i __A) {
// CHECK-LABEL: test_mm256_lzcnt_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 true)
return _mm256_lzcnt_epi64(__A);
}

TEST_CONSTEXPR(match_v4di(_mm256_lzcnt_epi64((__m256i)(__v4di){1, 2, 4, 8}), 63, 62, 61, 60));

__m256i test_mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_mask_lzcnt_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 true)
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_lzcnt_epi64(__W, __U, __A);
}

__m256i test_mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: test_mm256_maskz_lzcnt_epi64
// CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 false)
// CHECK: call {{.*}}<4 x i64> @llvm.ctlz.v4i64(<4 x i64> %{{.*}}, i1 true)
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_lzcnt_epi64(__U, __A);
}
Loading