Skip to content
Merged
51 changes: 51 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16287,6 +16287,42 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success((A | B) == 0, E);
}

case clang::X86::BI__builtin_ia32_kunpckhi: {
APSInt A, B;
if (!EvaluateInteger(E->getArg(0), A, Info) ||
!EvaluateInteger(E->getArg(1), B, Info))
return false;

// Extract lower 8 bits of each operand and concatenate
// Result = (A[7:0] << 8) | B[7:0]
APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF);
return Success(Result, E);
}

case clang::X86::BI__builtin_ia32_kunpckdi: {
APSInt A, B;
if (!EvaluateInteger(E->getArg(0), A, Info) ||
!EvaluateInteger(E->getArg(1), B, Info))
return false;

// Extract lower 32 bits of each operand and concatenate
// Result = (A[31:0] << 32) | B[31:0]
APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL);
return Success(Result, E);
}

case clang::X86::BI__builtin_ia32_kunpcksi: {
APSInt A, B;
if (!EvaluateInteger(E->getArg(0), A, Info) ||
!EvaluateInteger(E->getArg(1), B, Info))
return false;

// Extract lower 16 bits of each operand and concatenate
// Result = (A[15:0] << 16) | B[15:0]
APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF);
return Success(Result, E);
}

case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
Expand Down Expand Up @@ -16413,6 +16449,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(APValue(Result), E);
}

case X86::BI__builtin_ia32_kunpckhi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckdi: {
return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) {
// Unpack: concatenate lower half of RHS with lower half of LHS
unsigned HalfBits = LHS.getBitWidth() / 2;
APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());

APSInt LowerLHS = LHS & Mask;
APSInt LowerRHS = RHS & Mask;
return LowerRHS | (LowerLHS << HalfBits);
});
}

case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
case X86::BI__builtin_ia32_kaddsi:
Expand Down
9 changes: 4 additions & 5 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1606,15 +1606,14 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
(__v64qi) _mm512_setzero_si512());
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
__mmask64 __B) {
static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
(__mmask32) __B);
}
Expand Down
17 changes: 14 additions & 3 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)))

typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;

/* Rounding mode macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
Expand Down Expand Up @@ -8094,12 +8096,21 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
Expand Down
18 changes: 18 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -9126,6 +9126,24 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
__E, __F);
}

TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00);
TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000);
TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);

TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u);
TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);

TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull);
TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull);
TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);

__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kxnor
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
Expand Down
Loading