Skip to content

Commit 8a8cc05

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 VPSHUFBITQMB intrinsics to be used in constexpr
Resolves: #161337
1 parent b2f1233 commit 8a8cc05

File tree

7 files changed

+179
-42
lines changed

7 files changed

+179
-42
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,15 +1390,15 @@ let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVect
13901390
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
13911391
}
13921392

1393-
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1393+
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
13941394
def vpshufbitqmb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, unsigned short)">;
13951395
}
13961396

1397-
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1397+
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
13981398
def vpshufbitqmb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, unsigned int)">;
13991399
}
14001400

1401-
let Features = "avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1401+
let Features = "avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
14021402
def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
14031403
}
14041404

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3391,6 +3391,72 @@ static bool interp__builtin_ia32_shuffle_generic(
33913391
return true;
33923392
}
33933393

3394+
static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
3395+
const CallExpr *Call) {
3396+
3397+
assert(Call->getNumArgs() == 3);
3398+
3399+
QualType SourceType = Call->getArg(0)->getType();
3400+
QualType ShuffleMaskType = Call->getArg(1)->getType();
3401+
QualType ZeroMaskType = Call->getArg(2)->getType();
3402+
if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
3403+
!ZeroMaskType->isIntegerType()) {
3404+
return false;
3405+
}
3406+
3407+
Pointer Source, ShuffleMask;
3408+
APSInt ZeroMask = popToAPSInt(S, Call->getArg(2));
3409+
ShuffleMask = S.Stk.pop<Pointer>();
3410+
Source = S.Stk.pop<Pointer>();
3411+
3412+
const auto *SourceVecT = SourceType->castAs<VectorType>();
3413+
const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
3414+
assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
3415+
assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());
3416+
3417+
PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
3418+
PrimType ShuffleMaskElemT =
3419+
*S.getContext().classify(ShuffleMaskVecT->getElementType());
3420+
3421+
const unsigned NumBytesInQWord = 8;
3422+
const unsigned NumBitsInByte = 8;
3423+
const unsigned NumBytes = SourceVecT->getNumElements();
3424+
const unsigned NumQWords = NumBytes / NumBytesInQWord;
3425+
const unsigned RetWidth = ZeroMask.getBitWidth();
3426+
APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
3427+
3428+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
3429+
3430+
APInt SourceQWord(64, 0);
3431+
for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
3432+
++ByteInQWord) {
3433+
uint64_t Byte = 0;
3434+
INT_TYPE_SWITCH(SourceElemT, {
3435+
Byte = static_cast<uint64_t>(
3436+
Source.elem<T>(QWordId * NumBytesInQWord + ByteInQWord));
3437+
});
3438+
SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte);
3439+
}
3440+
3441+
for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
3442+
++ByteInQWord) {
3443+
unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord;
3444+
unsigned M = 0;
3445+
INT_TYPE_SWITCH(ShuffleMaskElemT, {
3446+
M = static_cast<unsigned>(ShuffleMask.elem<T>(ByteIdx)) & 0x3F;
3447+
});
3448+
3449+
if (ZeroMask[ByteIdx]) {
3450+
RetMask.setBitVal(ByteIdx, SourceQWord[M]);
3451+
}
3452+
}
3453+
}
3454+
3455+
pushInteger(S, RetMask, Call->getType());
3456+
3457+
return true;
3458+
}
3459+
33943460
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33953461
uint32_t BuiltinID) {
33963462
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4712,6 +4778,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
47124778
case X86::BI__builtin_ia32_ucmpq512_mask:
47134779
return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
47144780
/*IsUnsigned=*/true);
4781+
4782+
case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
4783+
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
4784+
case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
4785+
return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);
4786+
47154787
case X86::BI__builtin_ia32_pslldqi128_byteshift:
47164788
case X86::BI__builtin_ia32_pslldqi256_byteshift:
47174789
case X86::BI__builtin_ia32_pslldqi512_byteshift:

clang/lib/AST/ExprConstant.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16581,6 +16581,52 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1658116581

1658216582
return Success(APValue(RetMask), E);
1658316583
}
16584+
case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16585+
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16586+
case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16587+
assert(E->getNumArgs() == 3);
16588+
16589+
APValue Source, ShuffleMask;
16590+
APSInt ZeroMask;
16591+
if (!EvaluateVector(E->getArg(0), Source, Info) ||
16592+
!EvaluateVector(E->getArg(1), ShuffleMask, Info) ||
16593+
!EvaluateInteger(E->getArg(2), ZeroMask, Info))
16594+
return false;
16595+
16596+
assert(Source.getVectorLength() == ShuffleMask.getVectorLength());
16597+
assert(ZeroMask.getBitWidth() == Source.getVectorLength());
16598+
16599+
unsigned NumBytesInQWord = 8;
16600+
unsigned NumBitsInByte = 8;
16601+
unsigned NumBytes = Source.getVectorLength();
16602+
unsigned NumQWords = NumBytes / NumBytesInQWord;
16603+
unsigned RetWidth = ZeroMask.getBitWidth();
16604+
APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);
16605+
16606+
for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
16607+
16608+
APInt SourceQWord(64, 0);
16609+
for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
16610+
++ByteInQWord) {
16611+
uint64_t Byte =
16612+
Source.getVectorElt(QWordId * NumBytesInQWord + ByteInQWord)
16613+
.getInt()
16614+
.getZExtValue();
16615+
SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte);
16616+
}
16617+
16618+
for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord;
16619+
++ByteInQWord) {
16620+
unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord;
16621+
unsigned M =
16622+
ShuffleMask.getVectorElt(ByteIdx).getInt().getZExtValue() & 0x3F;
16623+
if (ZeroMask[ByteIdx]) {
16624+
RetMask.setBitVal(ByteIdx, SourceQWord[M]);
16625+
}
16626+
}
16627+
}
16628+
return Success(APValue(RetMask), E);
16629+
}
1658416630
}
1658516631
}
1658616632

clang/lib/Headers/avx512bitalgintrin.h

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,44 +15,43 @@
1515
#define __AVX512BITALGINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS \
20+
constexpr \
21+
__attribute__((__always_inline__, __nodebug__, \
22+
__target__("avx512bitalg"), __min_vector_width__(512)))
23+
#else
1824
#define __DEFAULT_FN_ATTRS \
1925
__attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \
2026
__min_vector_width__(512)))
21-
22-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24-
#else
25-
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
2627
#endif
2728

28-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
29-
_mm512_popcnt_epi16(__m512i __A) {
29+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) {
3030
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
3131
}
3232

33-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
33+
static __inline__ __m512i __DEFAULT_FN_ATTRS
3434
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
3535
return (__m512i)__builtin_ia32_selectw_512(
3636
(__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A);
3737
}
3838

39-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
39+
static __inline__ __m512i __DEFAULT_FN_ATTRS
4040
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
4141
return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B);
4242
}
4343

44-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
45-
_mm512_popcnt_epi8(__m512i __A) {
44+
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) {
4645
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
4746
}
4847

49-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
48+
static __inline__ __m512i __DEFAULT_FN_ATTRS
5049
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
5150
return (__m512i)__builtin_ia32_selectb_512(
5251
(__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A);
5352
}
5453

55-
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
54+
static __inline__ __m512i __DEFAULT_FN_ATTRS
5655
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
5756
return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B);
5857
}
@@ -74,6 +73,4 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
7473
}
7574

7675
#undef __DEFAULT_FN_ATTRS
77-
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
78-
7976
#endif

clang/lib/Headers/avx512vlbitalgintrin.h

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@
1515
#define __AVX512VLBITALGINTRIN_H
1616

1717
/* Define the default attributes for the functions in this file. */
18+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
19+
#define __DEFAULT_FN_ATTRS128 \
20+
constexpr __attribute__((__always_inline__, __nodebug__, \
21+
__target__("avx512vl,avx512bitalg"), \
22+
__min_vector_width__(128)))
23+
#define __DEFAULT_FN_ATTRS256 \
24+
constexpr __attribute__((__always_inline__, __nodebug__, \
25+
__target__("avx512vl,avx512bitalg"), \
26+
__min_vector_width__(256)))
27+
#else
1828
#define __DEFAULT_FN_ATTRS128 \
1929
__attribute__((__always_inline__, __nodebug__, \
2030
__target__("avx512vl,avx512bitalg"), \
@@ -23,75 +33,66 @@
2333
__attribute__((__always_inline__, __nodebug__, \
2434
__target__("avx512vl,avx512bitalg"), \
2535
__min_vector_width__(256)))
26-
27-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
28-
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29-
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
30-
#else
31-
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
32-
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
3336
#endif
3437

35-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
38+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
3639
_mm256_popcnt_epi16(__m256i __A) {
3740
return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
3841
}
3942

40-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
43+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
4144
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
4245
return (__m256i)__builtin_ia32_selectw_256(
4346
(__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A);
4447
}
4548

46-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
49+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
4750
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
4851
return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B);
4952
}
5053

51-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
52-
_mm_popcnt_epi16(__m128i __A) {
54+
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) {
5355
return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
5456
}
5557

56-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
58+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
5759
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
5860
return (__m128i)__builtin_ia32_selectw_128(
5961
(__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A);
6062
}
6163

62-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
64+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
6365
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
6466
return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B);
6567
}
6668

67-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
69+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
6870
_mm256_popcnt_epi8(__m256i __A) {
6971
return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
7072
}
7173

72-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
74+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
7375
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
7476
return (__m256i)__builtin_ia32_selectb_256(
7577
(__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A);
7678
}
7779

78-
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
80+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
7981
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
8082
return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B);
8183
}
8284

83-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
84-
_mm_popcnt_epi8(__m128i __A) {
85+
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) {
8586
return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
8687
}
8788

88-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
89+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
8990
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
9091
return (__m128i)__builtin_ia32_selectb_128(
9192
(__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A);
9293
}
9394

94-
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
95+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
9596
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
9697
return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B);
9798
}
@@ -131,7 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
131132

132133
#undef __DEFAULT_FN_ATTRS128
133134
#undef __DEFAULT_FN_ATTRS256
134-
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
135-
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
136-
137135
#endif

clang/test/CodeGen/X86/avx512bitalg-builtins.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,14 @@ __mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
7070
// CHECK: @llvm.x86.avx512.vpshufbitqmb.512
7171
return _mm512_bitshuffle_epi64_mask(__A, __B);
7272
}
73+
TEST_CONSTEXPR(_mm512_bitshuffle_epi64_mask(
74+
(__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
75+
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
76+
(__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
77+
0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010155ff0101ULL);
7378

79+
TEST_CONSTEXPR(_mm512_mask_bitshuffle_epi64_mask(0xFFFFFFFF00000000ULL,
80+
(__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
81+
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
82+
(__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
83+
0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010100000000ULL);

clang/test/CodeGen/X86/avx512vlbitalg-builtins.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ __mmask32 test_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
116116
// CHECK: @llvm.x86.avx512.vpshufbitqmb.256
117117
return _mm256_bitshuffle_epi64_mask(__A, __B);
118118
}
119+
TEST_CONSTEXPR(_mm256_bitshuffle_epi64_mask(
120+
(__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
121+
(__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0101);
122+
123+
TEST_CONSTEXPR(_mm256_mask_bitshuffle_epi64_mask(0xFFFF0000,
124+
(__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
125+
(__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0000);
119126

120127
__mmask16 test_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) {
121128
// CHECK-LABEL: test_mm_mask_bitshuffle_epi64_mask
@@ -129,4 +136,11 @@ __mmask16 test_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
129136
// CHECK: @llvm.x86.avx512.vpshufbitqmb.128
130137
return _mm_bitshuffle_epi64_mask(__A, __B);
131138
}
139+
TEST_CONSTEXPR(_mm_bitshuffle_epi64_mask(
140+
(__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
141+
(__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0101);
142+
143+
TEST_CONSTEXPR(_mm_mask_bitshuffle_epi64_mask(0xFF00,
144+
(__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
145+
(__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0100);
132146

0 commit comments

Comments
 (0)