Skip to content
Merged
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const] in {
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
}
Expand Down Expand Up @@ -3185,7 +3185,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
}

Expand Down
12 changes: 12 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4619,6 +4619,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });

case X86::BI__builtin_ia32_kunpckhi:
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
// Generic kunpack: extract lower half of each operand and concatenate
// Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
unsigned BW = A.getBitWidth();
return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)),
A.isUnsigned());
});

case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);

Expand Down
15 changes: 15 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16348,6 +16348,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success((A | B) == 0, E);
}

case clang::X86::BI__builtin_ia32_kunpckhi:
case clang::X86::BI__builtin_ia32_kunpckdi:
case clang::X86::BI__builtin_ia32_kunpcksi: {
APSInt A, B;
if (!EvaluateInteger(E->getArg(0), A, Info) ||
!EvaluateInteger(E->getArg(1), B, Info))
return false;

// Generic kunpack: extract lower half of each operand and concatenate
// Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
unsigned BW = A.getBitWidth();
APSInt Result(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned());
return Success(Result, E);
}

case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
Expand Down
9 changes: 4 additions & 5 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1606,15 +1606,14 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
(__v64qi) _mm512_setzero_si512());
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
__mmask64 __B) {
static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
(__mmask32) __B);
}
Expand Down
5 changes: 2 additions & 3 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -8069,9 +8069,8 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}

Expand Down
12 changes: 12 additions & 0 deletions clang/test/CodeGen/X86/avx512bw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -2731,6 +2731,12 @@ __mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D
return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F);
}

TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull);
TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull);
TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);

__mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kunpackw
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
Expand All @@ -2741,6 +2747,12 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D
return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F);
}

TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u);
TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);

__m512i test_mm512_loadu_epi16 (void *__P)
{
// CHECK-LABEL: test_mm512_loadu_epi16
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -9162,6 +9162,12 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
__E, __F);
}

TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF);
TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00);
TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);

__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kxnor
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
Expand Down
Loading