Skip to content

Commit c66f105

Browse files
[clang] Add _mm512_kunpackd and _mm512_kunpackw intrinsics with constexpr support
Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic. These intrinsics extract and concatenate the lower halves of mask registers, using the existing backend support for __builtin_ia32_kunpckdi and __builtin_ia32_kunpcksi builtins. Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h for completeness. This patch adds constexpr support for all three kunpack intrinsics by: 1. Using __DEFAULT_FN_ATTRS_CONSTEXPR attribute 2. Adding builtin interpretation in ExprConstant.cpp for compile-time evaluation in constexpr contexts 3. Adding constexpr tests to verify correct behavior Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c for runtime code generation validation.
1 parent e91be48 commit c66f105

File tree

4 files changed

+79
-12
lines changed

4 files changed

+79
-12
lines changed

clang/lib/AST/ExprConstant.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16287,6 +16287,42 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1628716287
return Success((A | B) == 0, E);
1628816288
}
1628916289

16290+
case clang::X86::BI__builtin_ia32_kunpckhi: {
16291+
APSInt A, B;
16292+
if (!EvaluateInteger(E->getArg(0), A, Info) ||
16293+
!EvaluateInteger(E->getArg(1), B, Info))
16294+
return false;
16295+
16296+
// Extract lower 8 bits of each operand and concatenate
16297+
// Result = (A[7:0] << 8) | B[7:0]
16298+
APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF);
16299+
return Success(Result, E);
16300+
}
16301+
16302+
case clang::X86::BI__builtin_ia32_kunpckdi: {
16303+
APSInt A, B;
16304+
if (!EvaluateInteger(E->getArg(0), A, Info) ||
16305+
!EvaluateInteger(E->getArg(1), B, Info))
16306+
return false;
16307+
16308+
// Extract lower 32 bits of each operand and concatenate
16309+
// Result = (A[31:0] << 32) | B[31:0]
16310+
APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL);
16311+
return Success(Result, E);
16312+
}
16313+
16314+
case clang::X86::BI__builtin_ia32_kunpcksi: {
16315+
APSInt A, B;
16316+
if (!EvaluateInteger(E->getArg(0), A, Info) ||
16317+
!EvaluateInteger(E->getArg(1), B, Info))
16318+
return false;
16319+
16320+
// Extract lower 16 bits of each operand and concatenate
16321+
// Result = (A[15:0] << 16) | B[15:0]
16322+
APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF);
16323+
return Success(Result, E);
16324+
}
16325+
1629016326
case clang::X86::BI__builtin_ia32_lzcnt_u16:
1629116327
case clang::X86::BI__builtin_ia32_lzcnt_u32:
1629216328
case clang::X86::BI__builtin_ia32_lzcnt_u64: {
@@ -16413,6 +16449,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1641316449
return Success(APValue(Result), E);
1641416450
}
1641516451

16452+
case X86::BI__builtin_ia32_kunpckhi:
16453+
case X86::BI__builtin_ia32_kunpcksi:
16454+
case X86::BI__builtin_ia32_kunpckdi: {
16455+
return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) {
16456+
// Unpack: concatenate lower half of RHS with lower half of LHS
16457+
unsigned HalfBits = LHS.getBitWidth() / 2;
16458+
APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
16459+
Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());
16460+
16461+
APSInt LowerLHS = LHS & Mask;
16462+
APSInt LowerRHS = RHS & Mask;
16463+
return LowerRHS | (LowerLHS << HalfBits);
16464+
});
16465+
}
16466+
1641616467
case X86::BI__builtin_ia32_kaddqi:
1641716468
case X86::BI__builtin_ia32_kaddhi:
1641816469
case X86::BI__builtin_ia32_kaddsi:

clang/lib/Headers/avx512bwintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,13 +1606,13 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
16061606
(__v64qi) _mm512_setzero_si512());
16071607
}
16081608

1609-
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
1610-
__mmask64 __B) {
1609+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
1610+
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
16111611
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
16121612
(__mmask64) __B);
16131613
}
16141614

1615-
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1615+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
16161616
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
16171617
{
16181618
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,

clang/lib/Headers/avx512fintrin.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8096,22 +8096,20 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
80968096
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
80978097
}
80988098

8099-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8099+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
81008100
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
81018101
{
81028102
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
81038103
}
81048104

8105-
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
8106-
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
8107-
{
8108-
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B);
8105+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
8106+
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
8107+
return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B);
81098108
}
81108109

8111-
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
8112-
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
8113-
{
8114-
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B);
8110+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
8111+
_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
8112+
return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B);
81158113
}
81168114

81178115
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9126,6 +9126,24 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
91269126
__E, __F);
91279127
}
91289128

9129+
TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00);
9130+
TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
9131+
TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000);
9132+
TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
9133+
TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
9134+
9135+
TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
9136+
TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
9137+
TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u);
9138+
TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
9139+
TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
9140+
9141+
TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
9142+
TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull);
9143+
TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull);
9144+
TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
9145+
TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
9146+
91299147
__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
91309148
// CHECK-LABEL: test_mm512_kxnor
91319149
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>

0 commit comments

Comments
 (0)