Skip to content

Commit 08fcf97

Browse files
committed
Squash for rebase
1 parent e313bc8 commit 08fcf97

File tree

12 files changed

+139
-27
lines changed

12 files changed

+139
-27
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
184184
def cvttss2si : X86Builtin<"int(_Vector<4, float>)">;
185185
}
186186

187-
let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
187+
let Features = "sse",
188+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
188189
def movmskps : X86Builtin<"int(_Vector<4, float>)">;
189190
}
190191

@@ -210,11 +211,6 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
210211
def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">;
211212
}
212213

213-
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
214-
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
215-
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
216-
}
217-
218214
let Features = "sse2", Attributes = [NoThrow] in {
219215
def movnti : X86Builtin<"void(int *, int)">;
220216
}
@@ -223,6 +219,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
223219
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
224220
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
225221
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
222+
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
223+
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
226224
}
227225

228226
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
@@ -538,6 +536,18 @@ let Features = "avx",
538536
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
539537
}
540538

539+
let Features = "avx",
540+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
541+
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
542+
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
543+
}
544+
545+
let Features = "avx",
546+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
547+
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
548+
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
549+
}
550+
541551
let Features = "avx",
542552
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
543553
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -548,13 +558,8 @@ let Features = "avx",
548558
def vtestnzcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">;
549559
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
550560
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
551-
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
552-
}
553-
554-
let Features = "avx",
555-
Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
556-
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
557-
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
561+
def ptestnzc256
562+
: X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
558563
}
559564

560565
let Features = "avx", Attributes = [NoThrow] in {
@@ -589,6 +594,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
589594
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
590595
}
591596

597+
let Features = "avx2",
598+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
599+
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
600+
}
601+
592602
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
593603
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
594604
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,6 +2795,46 @@ static bool interp__builtin_ia32_test_op(
27952795
return true;
27962796
}
27972797

2798+
static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
2799+
const CallExpr *Call) {
2800+
assert(Call->getNumArgs() == 1);
2801+
2802+
const Pointer &Source = S.Stk.pop<Pointer>();
2803+
2804+
unsigned SourceLen = Source.getNumElems();
2805+
QualType ElemQT = getElemType(Source);
2806+
OptPrimType ElemPT = S.getContext().classify(ElemQT);
2807+
unsigned ResultLen =
2808+
S.getASTContext().getTypeSize(Call->getType()); // Always 32-bit integer.
2809+
APInt Result(ResultLen, 0);
2810+
2811+
if (ElemQT->isIntegerType()) {
2812+
unsigned BitsInAByte = 8;
2813+
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
2814+
unsigned ResultIdx = 0;
2815+
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
2816+
for (unsigned I = 0; I != SourceLen; ++I) {
2817+
APInt Lane = Source.elem<T>(I).toAPSInt();
2818+
for (unsigned J = 0; J != LaneWidth; J += BitsInAByte) {
2819+
Result.setBitVal(ResultIdx++, Lane[J + 7]);
2820+
}
2821+
}
2822+
});
2823+
pushInteger(S, Result, Call->getType());
2824+
return true;
2825+
}
2826+
if (ElemQT->isRealFloatingType()) {
2827+
using T = PrimConv<PT_Float>::T;
2828+
for (unsigned I = 0; I != SourceLen; ++I) {
2829+
APInt Lane = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
2830+
Result.setBitVal(I, Lane.isNegative());
2831+
}
2832+
pushInteger(S, Result, Call->getType());
2833+
return true;
2834+
}
2835+
return false;
2836+
}
2837+
27982838
static bool interp__builtin_elementwise_triop(
27992839
InterpState &S, CodePtr OpPC, const CallExpr *Call,
28002840
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3491,6 +3531,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34913531
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
34923532
});
34933533

3534+
case clang::X86::BI__builtin_ia32_movmskps:
3535+
case clang::X86::BI__builtin_ia32_movmskpd:
3536+
case clang::X86::BI__builtin_ia32_pmovmskb128:
3537+
case clang::X86::BI__builtin_ia32_pmovmskb256:
3538+
case clang::X86::BI__builtin_ia32_movmskps256:
3539+
case clang::X86::BI__builtin_ia32_movmskpd256: {
3540+
return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
3541+
}
3542+
34943543
case clang::X86::BI__builtin_ia32_pavgb128:
34953544
case clang::X86::BI__builtin_ia32_pavgw128:
34963545
case clang::X86::BI__builtin_ia32_pavgb256:

clang/lib/AST/ExprConstant.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14972,6 +14972,44 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1497214972
return Success(CarryOut, E);
1497314973
}
1497414974

14975+
case clang::X86::BI__builtin_ia32_movmskps:
14976+
case clang::X86::BI__builtin_ia32_movmskpd:
14977+
case clang::X86::BI__builtin_ia32_pmovmskb128:
14978+
case clang::X86::BI__builtin_ia32_pmovmskb256:
14979+
case clang::X86::BI__builtin_ia32_movmskps256:
14980+
case clang::X86::BI__builtin_ia32_movmskpd256: {
14981+
APValue Source;
14982+
if (!Evaluate(Source, Info, E->getArg(0)))
14983+
return false;
14984+
unsigned SourceLen = Source.getVectorLength();
14985+
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
14986+
QualType ElemQT = VT->getElementType();
14987+
unsigned ResultLen = Info.Ctx.getTypeSize(
14988+
E->getCallReturnType(Info.Ctx)); // Always 32-bit integer.
14989+
APInt Result(ResultLen, 0);
14990+
14991+
if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane.
14992+
unsigned BitsInAByte = 8;
14993+
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
14994+
unsigned ResultIdx = 0;
14995+
for (unsigned I = 0; I != SourceLen; ++I) {
14996+
APInt Lane = Source.getVectorElt(I).getInt();
14997+
for (unsigned J = 0; J != LaneWidth; J += BitsInAByte) {
14998+
Result.setBitVal(ResultIdx++, Lane[J + 7]);
14999+
}
15000+
}
15001+
return Success(Result, E);
15002+
}
15003+
if (ElemQT->isRealFloatingType()) { // Get sign bit of every lane.
15004+
for (unsigned I = 0; I != SourceLen; ++I) {
15005+
APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt();
15006+
Result.setBitVal(I, Lane.isNegative());
15007+
}
15008+
return Success(Result, E);
15009+
}
15010+
return false;
15011+
}
15012+
1497515013
case clang::X86::BI__builtin_ia32_bextr_u32:
1497615014
case clang::X86::BI__builtin_ia32_bextr_u64:
1497715015
case clang::X86::BI__builtin_ia32_bextri_u32:

clang/lib/Headers/avx2intrin.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,9 +1304,8 @@ _mm256_min_epu32(__m256i __a, __m256i __b) {
13041304
/// \param __a
13051305
/// A 256-bit integer vector containing the source bytes.
13061306
/// \returns The 32-bit integer mask.
1307-
static __inline__ int __DEFAULT_FN_ATTRS256
1308-
_mm256_movemask_epi8(__m256i __a)
1309-
{
1307+
static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR
1308+
_mm256_movemask_epi8(__m256i __a) {
13101309
return __builtin_ia32_pmovmskb256((__v32qi)__a);
13111310
}
13121311

clang/lib/Headers/avxintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2945,9 +2945,8 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b) {
29452945
/// A 256-bit vector of [4 x double] containing the double-precision
29462946
/// floating point values with sign bits to be extracted.
29472947
/// \returns The sign bits from the operand, written to bits [3:0].
2948-
static __inline int __DEFAULT_FN_ATTRS
2949-
_mm256_movemask_pd(__m256d __a)
2950-
{
2948+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
2949+
_mm256_movemask_pd(__m256d __a) {
29512950
return __builtin_ia32_movmskpd256((__v4df)__a);
29522951
}
29532952

@@ -2963,9 +2962,8 @@ _mm256_movemask_pd(__m256d __a)
29632962
/// A 256-bit vector of [8 x float] containing the single-precision floating
29642963
/// point values with sign bits to be extracted.
29652964
/// \returns The sign bits from the operand, written to bits [7:0].
2966-
static __inline int __DEFAULT_FN_ATTRS
2967-
_mm256_movemask_ps(__m256 __a)
2968-
{
2965+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
2966+
_mm256_movemask_ps(__m256 __a) {
29692967
return __builtin_ia32_movmskps256((__v8sf)__a);
29702968
}
29712969

clang/lib/Headers/emmintrin.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4280,7 +4280,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b) {
42804280
/// A 128-bit integer vector containing the values with bits to be extracted.
42814281
/// \returns The most significant bits from each 8-bit element in \a __a,
42824282
/// written to bits [15:0]. The other bits are assigned zeros.
4283-
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
4283+
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
4284+
_mm_movemask_epi8(__m128i __a) {
42844285
return __builtin_ia32_pmovmskb128((__v16qi)__a);
42854286
}
42864287

@@ -4699,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) {
46994700
/// be extracted.
47004701
/// \returns The sign bits from each of the double-precision elements in \a __a,
47014702
/// written to bits [1:0]. The remaining bits are assigned values of zero.
4702-
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
4703+
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
4704+
_mm_movemask_pd(__m128d __a) {
47034705
return __builtin_ia32_movmskpd((__v2df)__a);
47044706
}
47054707

clang/lib/Headers/xmmintrin.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,9 +3014,7 @@ _mm_cvtps_pi8(__m128 __a)
30143014
/// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
30153015
/// single-precision floating-point element of the parameter. Bits [31:4] are
30163016
/// set to zero.
3017-
static __inline__ int __DEFAULT_FN_ATTRS
3018-
_mm_movemask_ps(__m128 __a)
3019-
{
3017+
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) {
30203018
return __builtin_ia32_movmskps((__v4sf)__a);
30213019
}
30223020

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,12 +1337,16 @@ int test_mm256_movemask_pd(__m256d A) {
13371337
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}})
13381338
return _mm256_movemask_pd(A);
13391339
}
1340+
TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-1234.5678901234, 98765.4321098765, 0.000123456789, -3.14159265358979}) == 0x9);
1341+
TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-0.000000987654321, -99999.999999999, 42.424242424242, 314159.2653589793}) == 0x3);
13401342

13411343
int test_mm256_movemask_ps(__m256 A) {
13421344
// CHECK-LABEL: test_mm256_movemask_ps
13431345
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}})
13441346
return _mm256_movemask_ps(A);
13451347
}
1348+
TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){-12.3456f, 34.7890f, -0.0001234f, 123456.78f, -987.654f, 0.001234f, 3.14159f, -256.001f}) == 0x95);
1349+
TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){0.333333f, -45.6789f, 999.999f, -0.9999f, 17.234f, -128.512f, 2048.0f, -3.14f}) == 0xAA);
13461350

13471351
__m256d test_mm256_mul_pd(__m256d A, __m256d B) {
13481352
// CHECK-LABEL: test_mm256_mul_pd

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,9 @@ int test_mm256_movemask_epi8(__m256i a) {
968968
// CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}})
969969
return _mm256_movemask_epi8(a);
970970
}
971+
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v32qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01,0xB6,0x00,0x39,0x40,0xD0,0x05,0x80,0x2A,0x7B,0x00,0x90,0xFF,0x01,0x34,0xC0,0x6D}) == 0x4C516AAA);
972+
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v8si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001,(int)0x00000000,(int)0xFFFFFFFF,(int)0x12345678,(int)0x90ABCDEF}) == 0xF0F08F3D);
973+
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v4du){0xFF00000000000080ULL,0x7F010203040506C3ULL,0x8000000000000000ULL,0x0123456789ABCDEFULL}) == 0x0F800181);
971974

972975
__m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
973976
// CHECK-LABEL: test_mm256_mpsadbw_epu8

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,10 @@ int test_mm_movemask_pi8(__m64 a) {
403403
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(
404404
return _mm_movemask_pi8(a);
405405
}
406+
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v8qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3})) == 0xAA);
407+
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})) == 0x3D);
408+
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v1di){(long long)0xE110837A00924DB0ULL})) == 0xA5);
409+
406410

407411
__m64 test_mm_mul_su32(__m64 a, __m64 b) {
408412
// CHECK-LABEL: test_mm_mul_su32

0 commit comments

Comments
 (0)