Skip to content

Commit e46b536

Browse files
refactor: PR Feedback
1 parent e56047a commit e46b536

File tree

5 files changed

+41
-58
lines changed

5 files changed

+41
-58
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2151,7 +2151,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
21512151
def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">;
21522152
}
21532153

2154-
let Features = "avx512bw", Attributes = [NoThrow, Const] in {
2154+
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
21552155
def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
21562156
def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">;
21572157
}
@@ -3194,7 +3194,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in {
31943194
def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">;
31953195
}
31963196

3197-
let Features = "avx512f", Attributes = [NoThrow, Const] in {
3197+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in {
31983198
def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">;
31993199
}
32003200

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4579,6 +4579,32 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
45794579
S, OpPC, Call,
45804580
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
45814581

4582+
case X86::BI__builtin_ia32_kunpckhi:
4583+
case X86::BI__builtin_ia32_kunpckdi:
4584+
case X86::BI__builtin_ia32_kunpcksi:
4585+
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
4586+
[](const APSInt &A,
4587+
const APSInt &B) {
4588+
// Generic kunpack: extract
4589+
// lower half of each operand
4590+
// and concatenate Result =
4591+
// (A[HalfWidth-1:0] <<
4592+
// HalfWidth) |
4593+
// B[HalfWidth-1:0]
4594+
unsigned HalfWidth =
4595+
A.getBitWidth() / 2;
4596+
APSInt Result(
4597+
A.getLoBits(HalfWidth)
4598+
.zext(A.getBitWidth()),
4599+
A.isUnsigned());
4600+
Result <<= HalfWidth;
4601+
Result |= APSInt(
4602+
B.getLoBits(HalfWidth)
4603+
.zext(B.getBitWidth()),
4604+
B.isUnsigned());
4605+
return Result;
4606+
});
4607+
45824608
case X86::BI__builtin_ia32_phminposuw128:
45834609
return interp__builtin_ia32_phminposuw(S, OpPC, Call);
45844610

clang/lib/AST/ExprConstant.cpp

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -16287,39 +16287,20 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1628716287
return Success((A | B) == 0, E);
1628816288
}
1628916289

16290-
case clang::X86::BI__builtin_ia32_kunpckhi: {
16291-
APSInt A, B;
16292-
if (!EvaluateInteger(E->getArg(0), A, Info) ||
16293-
!EvaluateInteger(E->getArg(1), B, Info))
16294-
return false;
16295-
16296-
// Extract lower 8 bits of each operand and concatenate
16297-
// Result = (A[7:0] << 8) | B[7:0]
16298-
APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF);
16299-
return Success(Result, E);
16300-
}
16301-
16302-
case clang::X86::BI__builtin_ia32_kunpckdi: {
16303-
APSInt A, B;
16304-
if (!EvaluateInteger(E->getArg(0), A, Info) ||
16305-
!EvaluateInteger(E->getArg(1), B, Info))
16306-
return false;
16307-
16308-
// Extract lower 32 bits of each operand and concatenate
16309-
// Result = (A[31:0] << 32) | B[31:0]
16310-
APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL);
16311-
return Success(Result, E);
16312-
}
16313-
16290+
case clang::X86::BI__builtin_ia32_kunpckhi:
16291+
case clang::X86::BI__builtin_ia32_kunpckdi:
1631416292
case clang::X86::BI__builtin_ia32_kunpcksi: {
1631516293
APSInt A, B;
1631616294
if (!EvaluateInteger(E->getArg(0), A, Info) ||
1631716295
!EvaluateInteger(E->getArg(1), B, Info))
1631816296
return false;
1631916297

16320-
// Extract lower 16 bits of each operand and concatenate
16321-
// Result = (A[15:0] << 16) | B[15:0]
16322-
APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF);
16298+
// Generic kunpack: extract lower half of each operand and concatenate
16299+
// Result = (A[HalfWidth-1:0] << HalfWidth) | B[HalfWidth-1:0]
16300+
unsigned HalfWidth = A.getBitWidth() / 2;
16301+
APSInt Result(A.getLoBits(HalfWidth).zext(A.getBitWidth()), A.isUnsigned());
16302+
Result <<= HalfWidth;
16303+
Result |= APSInt(B.getLoBits(HalfWidth).zext(B.getBitWidth()), B.isUnsigned());
1632316304
return Success(Result, E);
1632416305
}
1632516306

@@ -16449,20 +16430,6 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1644916430
return Success(APValue(Result), E);
1645016431
}
1645116432

16452-
case X86::BI__builtin_ia32_kunpckhi:
16453-
case X86::BI__builtin_ia32_kunpcksi:
16454-
case X86::BI__builtin_ia32_kunpckdi: {
16455-
return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) {
16456-
// Unpack: concatenate lower half of RHS with lower half of LHS
16457-
unsigned HalfBits = LHS.getBitWidth() / 2;
16458-
APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
16459-
Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());
16460-
16461-
APSInt LowerLHS = LHS & Mask;
16462-
APSInt LowerRHS = RHS & Mask;
16463-
return LowerRHS | (LowerLHS << HalfBits);
16464-
});
16465-
}
1646616433

1646716434
case X86::BI__builtin_ia32_kaddqi:
1646816435
case X86::BI__builtin_ia32_kaddhi:

clang/lib/Headers/avx512fintrin.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8101,16 +8101,6 @@ _mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
81018101
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
81028102
}
81038103

8104-
static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
8105-
_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
8106-
return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B);
8107-
}
8108-
8109-
static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
8110-
_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
8111-
return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B);
8112-
}
8113-
81148104
static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
81158105
_mm512_kxnor(__mmask16 __A, __mmask16 __B) {
81168106
return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9126,21 +9126,21 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
91269126
__E, __F);
91279127
}
91289128

9129-
TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00);
9129+
TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF);
91309130
TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
9131-
TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000);
9131+
TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00);
91329132
TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
91339133
TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
91349134

91359135
TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
91369136
TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
9137-
TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u);
9137+
TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u);
91389138
TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
91399139
TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
91409140

91419141
TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
9142-
TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull);
9143-
TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull);
9142+
TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull);
9143+
TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull);
91449144
TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
91459145
TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
91469146

0 commit comments

Comments
 (0)