Skip to content

Commit 44f72fb

Browse files
[X86][Clang] Add AVX512 kunpck intrinsics to be used in constexp (#167683)
Resolves #166976
1 parent c946418 commit 44f72fb

File tree

7 files changed

+53
-10
lines changed

7 files changed

+53
-10
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2142,7 +2142,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
21422142
def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
21432143
}
21442144

2145-
let Features = "avx512bw", Attributes = [NoThrow, Const] in {
2145+
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
21462146
def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
21472147
def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
21482148
}
@@ -3185,7 +3185,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
31853185
def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
31863186
}
31873187

3188-
let Features = "avx512f", Attributes = [NoThrow, Const] in {
3188+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
31893189
def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
31903190
}
31913191

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4619,6 +4619,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
46194619
S, OpPC, Call,
46204620
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
46214621

4622+
case X86::BI__builtin_ia32_kunpckhi:
4623+
case X86::BI__builtin_ia32_kunpckdi:
4624+
case X86::BI__builtin_ia32_kunpcksi:
4625+
return interp__builtin_elementwise_int_binop(
4626+
S, OpPC, Call, [](const APSInt &A, const APSInt &B) {
4627+
// Generic kunpack: extract lower half of each operand and concatenate
4628+
// Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
4629+
unsigned BW = A.getBitWidth();
4630+
return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)),
4631+
A.isUnsigned());
4632+
});
4633+
46224634
case X86::BI__builtin_ia32_phminposuw128:
46234635
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
46244636

clang/lib/AST/ExprConstant.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16348,6 +16348,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1634816348
return Success((A | B) == 0, E);
1634916349
}
1635016350

16351+
case clang::X86::BI__builtin_ia32_kunpckhi:
16352+
case clang::X86::BI__builtin_ia32_kunpckdi:
16353+
case clang::X86::BI__builtin_ia32_kunpcksi: {
16354+
APSInt A, B;
16355+
if (!EvaluateInteger(E->getArg(0), A, Info) ||
16356+
!EvaluateInteger(E->getArg(1), B, Info))
16357+
return false;
16358+
16359+
// Generic kunpack: extract lower half of each operand and concatenate
16360+
// Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0]
16361+
unsigned BW = A.getBitWidth();
16362+
APSInt Result(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned());
16363+
return Success(Result, E);
16364+
}
16365+
1635116366
case clang::X86::BI__builtin_ia32_lzcnt_u16:
1635216367
case clang::X86::BI__builtin_ia32_lzcnt_u32:
1635316368
case clang::X86::BI__builtin_ia32_lzcnt_u64: {

clang/lib/Headers/avx512bwintrin.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,15 +1606,14 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
16061606
(__v64qi) _mm512_setzero_si512());
16071607
}
16081608

1609-
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
1610-
__mmask64 __B) {
1609+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
1610+
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
16111611
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
16121612
(__mmask64) __B);
16131613
}
16141614

1615-
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1616-
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1617-
{
1615+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
1616+
_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
16181617
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
16191618
(__mmask32) __B);
16201619
}

clang/lib/Headers/avx512fintrin.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8069,9 +8069,8 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
80698069
return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
80708070
}
80718071

8072-
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8073-
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8074-
{
8072+
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
8073+
_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
80758074
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
80768075
}
80778076

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,12 @@ __mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D
27312731
return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F);
27322732
}
27332733

2734+
TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
2735+
TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull);
2736+
TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull);
2737+
TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
2738+
TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
2739+
27342740
__mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
27352741
// CHECK-LABEL: test_mm512_kunpackw
27362742
// CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
@@ -2741,6 +2747,12 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D
27412747
return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F);
27422748
}
27432749

2750+
TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
2751+
TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
2752+
TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u);
2753+
TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
2754+
TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
2755+
27442756
__m512i test_mm512_loadu_epi16 (void *__P)
27452757
{
27462758
// CHECK-LABEL: test_mm512_loadu_epi16

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9162,6 +9162,12 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
91629162
__E, __F);
91639163
}
91649164

9165+
TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF);
9166+
TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
9167+
TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00);
9168+
TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
9169+
TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
9170+
91659171
__mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
91669172
// CHECK-LABEL: test_mm512_kxnor
91679173
// CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>

0 commit comments

Comments
 (0)