Skip to content

Commit 79c747e

Browse files
committed
Add testcases and handling in new evaluator
1 parent baf0d69 commit 79c747e

File tree

7 files changed

+96
-35
lines changed

7 files changed

+96
-35
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2817,6 +2817,46 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
28172817
return true;
28182818
}
28192819

2820+
static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
2821+
const CallExpr *Call) {
2822+
assert(Call->getNumArgs() == 1);
2823+
2824+
const Pointer &Source = S.Stk.pop<Pointer>();
2825+
2826+
unsigned SourceLen = Source.getNumElems();
2827+
const QualType ElemQT = getElemType(Source);
2828+
const OptPrimType ElemPT = S.getContext().classify(ElemQT);
2829+
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
2830+
2831+
if (ElemQT->isIntegerType()) {
2832+
unsigned Byte = 8;
2833+
unsigned ResultLen = (LaneWidth * SourceLen) / Byte;
2834+
APInt Result(ResultLen, 0);
2835+
unsigned ResultIdx = 0;
2836+
for (unsigned I = 0; I != SourceLen; ++I) {
2837+
APInt Lane;
2838+
INT_TYPE_SWITCH_NO_BOOL(*ElemPT,
2839+
{ Lane = Source.elem<T>(I).toAPSInt(); });
2840+
for (unsigned J = 0; J != LaneWidth; J += Byte) {
2841+
Result.setBitVal(ResultIdx++, Lane[J + 7]);
2842+
}
2843+
}
2844+
pushInteger(S, Result.getZExtValue(), Call->getType());
2845+
return true;
2846+
}
2847+
if (ElemQT->isFloatingType()) {
2848+
APInt Result(SourceLen, 0);
2849+
using T = PrimConv<PT_Float>::T;
2850+
for (unsigned I = 0; I != SourceLen; ++I) {
2851+
APInt Lane = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
2852+
Result.setBitVal(I, Lane[LaneWidth - 1]);
2853+
}
2854+
pushInteger(S, Result.getZExtValue(), Call->getType());
2855+
return true;
2856+
}
2857+
return false;
2858+
}
2859+
28202860
static bool interp__builtin_elementwise_triop(
28212861
InterpState &S, CodePtr OpPC, const CallExpr *Call,
28222862
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3454,6 +3494,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34543494
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
34553495
});
34563496

3497+
case clang::X86::BI__builtin_ia32_movmskps:
3498+
case clang::X86::BI__builtin_ia32_movmskpd:
3499+
case clang::X86::BI__builtin_ia32_pmovmskb128:
3500+
case clang::X86::BI__builtin_ia32_pmovmskb256:
3501+
case clang::X86::BI__builtin_ia32_movmskps256:
3502+
case clang::X86::BI__builtin_ia32_movmskpd256: {
3503+
return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
3504+
}
3505+
34573506
case clang::X86::BI__builtin_ia32_pavgb128:
34583507
case clang::X86::BI__builtin_ia32_pavgw128:
34593508
case clang::X86::BI__builtin_ia32_pavgb256:

clang/lib/AST/ExprConstant.cpp

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13766,40 +13766,6 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
1376613766

1376713767
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1376813768
unsigned BuiltinOp) {
13769-
13770-
auto EvalMoveMaskOp = [&]() {
13771-
APValue Source;
13772-
if (!Evaluate(Source, Info, E->getArg(0)))
13773-
return false;
13774-
unsigned SourceLen = Source.getVectorLength();
13775-
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
13776-
const QualType ElemQT = VT->getElementType();
13777-
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
13778-
13779-
if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane
13780-
unsigned ByteLen = 8;
13781-
unsigned ResultLen = (LaneWidth * SourceLen) / ByteLen;
13782-
APInt Result(ResultLen, 0);
13783-
unsigned ResultIdx = 0;
13784-
for (unsigned I = 0; I != SourceLen; ++I) {
13785-
APInt Lane = Source.getVectorElt(I).getInt();
13786-
for (unsigned J = 0; J != LaneWidth; J = J + ByteLen) {
13787-
Result.setBitVal(ResultIdx++, Lane[J]);
13788-
}
13789-
}
13790-
return Success(Result, E);
13791-
}
13792-
if (ElemQT->isFloatingType()) { // Get sign bit of every lane
13793-
APInt Result(SourceLen, 0);
13794-
for (unsigned I = 0; I != SourceLen; ++I) {
13795-
APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt();
13796-
Result.setBitVal(I, Lane[LaneWidth - 1]);
13797-
}
13798-
return Success(Result, E);
13799-
}
13800-
return false;
13801-
};
13802-
1380313769
auto HandleMaskBinOp =
1380413770
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
1380513771
-> bool {
@@ -14834,7 +14800,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1483414800
case clang::X86::BI__builtin_ia32_pmovmskb256:
1483514801
case clang::X86::BI__builtin_ia32_movmskps256:
1483614802
case clang::X86::BI__builtin_ia32_movmskpd256: {
14837-
return EvalMoveMaskOp();
14803+
APValue Source;
14804+
if (!Evaluate(Source, Info, E->getArg(0)))
14805+
return false;
14806+
unsigned SourceLen = Source.getVectorLength();
14807+
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
14808+
const QualType ElemQT = VT->getElementType();
14809+
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
14810+
14811+
if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane
14812+
unsigned Byte = 8;
14813+
unsigned ResultLen = (LaneWidth * SourceLen) / Byte;
14814+
APInt Result(ResultLen, 0);
14815+
unsigned ResultIdx = 0;
14816+
for (unsigned I = 0; I != SourceLen; ++I) {
14817+
APInt Lane = Source.getVectorElt(I).getInt();
14818+
for (unsigned J = 0; J != LaneWidth; J += Byte) {
14819+
Result.setBitVal(ResultIdx++, Lane[J + 7]);
14820+
}
14821+
}
14822+
return Success(Result.getZExtValue(), E);
14823+
}
14824+
if (ElemQT->isFloatingType()) { // Get sign bit of every lane
14825+
APInt Result(SourceLen, 0);
14826+
for (unsigned I = 0; I != SourceLen; ++I) {
14827+
APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt();
14828+
Result.setBitVal(I, Lane[LaneWidth - 1]);
14829+
}
14830+
return Success(Result.getZExtValue(), E);
14831+
}
14832+
return false;
1483814833
}
1483914834

1484014835
case clang::X86::BI__builtin_ia32_bextr_u32:

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,12 +1337,16 @@ int test_mm256_movemask_pd(__m256d A) {
13371337
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}})
13381338
return _mm256_movemask_pd(A);
13391339
}
1340+
TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-1234.5678901234, 98765.4321098765, 0.000123456789, -3.14159265358979}) == 0x9);
1341+
TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-0.000000987654321, -99999.999999999, 42.424242424242, 314159.2653589793}) == 0x3);
13401342

13411343
int test_mm256_movemask_ps(__m256 A) {
13421344
// CHECK-LABEL: test_mm256_movemask_ps
13431345
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}})
13441346
return _mm256_movemask_ps(A);
13451347
}
1348+
TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){-12.3456f, 34.7890f, -0.0001234f, 123456.78f, -987.654f, 0.001234f, 3.14159f, -256.001f}) == 0x95);
1349+
TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){0.333333f, -45.6789f, 999.999f, -0.9999f, 17.234f, -128.512f, 2048.0f, -3.14f}) == 0xAA);
13461350

13471351
__m256d test_mm256_mul_pd(__m256d A, __m256d B) {
13481352
// CHECK-LABEL: test_mm256_mul_pd

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,9 @@ int test_mm256_movemask_epi8(__m256i a) {
966966
// CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}})
967967
return _mm256_movemask_epi8(a);
968968
}
969+
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v32qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01,0xB6,0x00,0x39,0x40,0xD0,0x05,0x80,0x2A,0x7B,0x00,0x90,0xFF,0x01,0x34,0xC0,0x6D}) == 0x4C516AAA);
970+
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v8si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001,(int)0x00000000,(int)0xFFFFFFFF,(int)0x12345678,(int)0x90ABCDEF}) == 0xF0F08F3D);
971+
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v4du){0xFF00000000000080ULL,0x7F010203040506C3ULL,0x8000000000000000ULL,0x0123456789ABCDEFULL}) == 0x0F800181);
969972

970973
__m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
971974
// CHECK-LABEL: test_mm256_mpsadbw_epu8

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,9 @@ int test_mm_movemask_pi8(__m64 a) {
401401
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(
402402
return _mm_movemask_pi8(a);
403403
}
404+
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v8qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3})) == 0xAA);
405+
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})) == 0x3D);
406+
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v1di){(long long)0xE110837A00924DB0ULL})) == 0xA5);
404407

405408

406409
__m64 test_mm_mul_su32(__m64 a, __m64 b) {

clang/test/CodeGen/X86/sse-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,8 @@ int test_mm_movemask_ps(__m128 A) {
561561
// CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
562562
return _mm_movemask_ps(A);
563563
}
564+
TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-2.0f, 3.0f, -5.5f, -0.0f}) == 0xD);
565+
TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-7.348215e5, 0.00314159, -12.789, 2.7182818}) == 0x5);
564566

565567
__m128 test_mm_mul_ps(__m128 A, __m128 B) {
566568
// CHECK-LABEL: test_mm_mul_ps

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,12 +955,17 @@ int test_mm_movemask_epi8(__m128i A) {
955955
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
956956
return _mm_movemask_epi8(A);
957957
}
958+
TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v16qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01}) == 0x6AAA);
959+
TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v4si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001}) == 0x8F3D);
960+
TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v2du){0xFF00000000000080ULL,0x7F010203040506C3ULL}) == 0x181);
958961

959962
int test_mm_movemask_pd(__m128d A) {
960963
// CHECK-LABEL: test_mm_movemask_pd
961964
// CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
962965
return _mm_movemask_pd(A);
963966
}
967+
TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){-12345.67890123, 4567.89012345}) == 0x1);
968+
TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){0.0000987654321, 09876.5432109876}) == 0x0);
964969

965970
__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
966971
// CHECK-LABEL: test_mm_mul_epu32

0 commit comments

Comments
 (0)