Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1366,15 +1366,15 @@ let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
}

let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpshufbitqmb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, unsigned short)">;
}

let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpshufbitqmb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, unsigned int)">;
}

let Features = "avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
}

Expand Down
69 changes: 69 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}

static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {

assert(Call->getNumArgs() == 3);

QualType SourceType = Call->getArg(0)->getType();
QualType ShuffleMaskType = Call->getArg(1)->getType();
QualType ZeroMaskType = Call->getArg(2)->getType();
if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() ||
!ZeroMaskType->isIntegerType()) {
return false;
}

Pointer Source, ShuffleMask;
APSInt ZeroMask = popToAPSInt(S, Call->getArg(2));
ShuffleMask = S.Stk.pop<Pointer>();
Source = S.Stk.pop<Pointer>();

const auto *SourceVecT = SourceType->castAs<VectorType>();
const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>();
assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements());
assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements());

PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
PrimType ShuffleMaskElemT =
*S.getContext().classify(ShuffleMaskVecT->getElementType());

unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = SourceVecT->getNumElements();
unsigned NumQWords = NumBytes / NumBytesInQWord;
unsigned RetWidth = ZeroMask.getBitWidth();
APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt SourceQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
uint64_t Byte = 0;
INT_TYPE_SWITCH(SourceElemT, {
Byte = static_cast<uint64_t>(
Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx));
});
SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
unsigned M = 0;
INT_TYPE_SWITCH(ShuffleMaskElemT, {
M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F;
});

if (ZeroMask[SelIdx]) {
RetMask.setBitVal(SelIdx, SourceQWord[M]);
}
}
}

pushInteger(S, RetMask, Call->getType());

return true;
}

bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -4868,6 +4931,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_ucmpq512_mask:
return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID,
/*IsUnsigned=*/true);

case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call);

case X86::BI__builtin_ia32_pslldqi128_byteshift:
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift:
Expand Down
42 changes: 42 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16762,6 +16762,48 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,

return Success(APValue(RetMask), E);
}
case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
assert(E->getNumArgs() == 3);

APValue Source, ShuffleMask;
APSInt ZeroMask;
if (!EvaluateVector(E->getArg(0), Source, Info) ||
!EvaluateVector(E->getArg(1), ShuffleMask, Info) ||
!EvaluateInteger(E->getArg(2), ZeroMask, Info))
return false;

assert(Source.getVectorLength() == ShuffleMask.getVectorLength());
assert(ZeroMask.getBitWidth() == Source.getVectorLength());

unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = Source.getVectorLength();
unsigned NumQWords = NumBytes / NumBytesInQWord;
unsigned RetWidth = ZeroMask.getBitWidth();
APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true);

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt SourceQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
uint64_t Byte = Source.getVectorElt(QWordId * NumBytesInQWord + ByteIdx)
.getInt()
.getZExtValue();
SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx;
unsigned M =
ShuffleMask.getVectorElt(SelIdx).getInt().getZExtValue() & 0x3F;
if (ZeroMask[SelIdx]) {
RetMask.setBitVal(SelIdx, SourceQWord[M]);
}
}
}
return Success(APValue(RetMask), E);
}
}
}

Expand Down
26 changes: 11 additions & 15 deletions clang/lib/Headers/avx512bitalgintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,44 +15,42 @@
#define __AVX512BITALGINTRIN_H

/* Define the default attributes for the functions in this file. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \
__min_vector_width__(512)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
__min_vector_width__(512))) constexpr
#else
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \
__min_vector_width__(512)))
#endif

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_popcnt_epi16(__m512i __A) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) {
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_popcnt_epi8(__m512i __A) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) {
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(
(__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B);
}
Expand All @@ -74,6 +72,4 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
}

#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_CONSTEXPR

#endif
46 changes: 22 additions & 24 deletions clang/lib/Headers/avx512vlbitalgintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@
#define __AVX512VLBITALGINTRIN_H

/* Define the default attributes for the functions in this file. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg"), \
__min_vector_width__(128))) constexpr
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg"), \
__min_vector_width__(256))) constexpr
#else
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg"), \
Expand All @@ -23,75 +33,66 @@
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512bitalg"), \
__min_vector_width__(256)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#else
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#endif

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A) {
return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) {
return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_popcnt_epi16(__m128i __A) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) {
return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128(
(__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) {
return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi8(__m256i __A) {
return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(
(__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) {
return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_popcnt_epi8(__m128i __A) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) {
return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(
(__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) {
return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B);
}
Expand Down Expand Up @@ -131,7 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR

#endif
10 changes: 10 additions & 0 deletions clang/test/CodeGen/X86/avx512bitalg-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,14 @@ __mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.512
return _mm512_bitshuffle_epi64_mask(__A, __B);
}
TEST_CONSTEXPR(_mm512_bitshuffle_epi64_mask(
(__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
(__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010155ff0101ULL);

TEST_CONSTEXPR(_mm512_mask_bitshuffle_epi64_mask(0xFFFFFFFF00000000ULL,
(__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85,
1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
(__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7,
0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010100000000ULL);
14 changes: 14 additions & 0 deletions clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ __mmask32 test_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.256
return _mm256_bitshuffle_epi64_mask(__A, __B);
}
TEST_CONSTEXPR(_mm256_bitshuffle_epi64_mask(
(__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
(__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0101);

TEST_CONSTEXPR(_mm256_mask_bitshuffle_epi64_mask(0xFFFF0000,
(__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85},
(__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0000);

__mmask16 test_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_bitshuffle_epi64_mask
Expand All @@ -129,4 +136,11 @@ __mmask16 test_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
// CHECK: @llvm.x86.avx512.vpshufbitqmb.128
return _mm_bitshuffle_epi64_mask(__A, __B);
}
TEST_CONSTEXPR(_mm_bitshuffle_epi64_mask(
(__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
(__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0101);

TEST_CONSTEXPR(_mm_mask_bitshuffle_epi64_mask(0xFF00,
(__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128},
(__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0100);

Loading