Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def emms : X86Builtin<"void()"> {
let Features = "mmx";
}

let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in {
let Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<64>], Features = "sse" in {
def vec_ext_v4hi : X86Builtin<"short(_Vector<4, short>, _Constant int)">;
def vec_set_v4hi : X86Builtin<"_Vector<4, short>(_Vector<4, short>, short, _Constant int)">;
}
Expand Down Expand Up @@ -92,13 +92,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
}

let Features = "sse2" in {
def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
Expand All @@ -108,6 +101,12 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;

def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
}

let Features = "sse3" in {
Expand Down Expand Up @@ -323,9 +322,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand All @@ -338,6 +334,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector

def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;

def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}

let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -560,7 +560,7 @@ let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskstoreps : X86Builtin<"void(_Vector<4, float *>, _Vector<4, int>, _Vector<4, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vec_ext_v32qi : X86Builtin<"char(_Vector<32, char>, _Constant int)">;
def vec_ext_v16hi : X86Builtin<"short(_Vector<16, short>, _Constant int)">;
def vec_ext_v8si : X86Builtin<"int(_Vector<8, int>, _Constant int)">;
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86_64.td
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti64 : X86Builtin<"void(long long int *, long long int)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vec_set_v2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int, _Constant int)">;
}

let Features = "crc32", Attributes = [NoThrow, Const] in {
def crc32di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vec_ext_v4di : X86Builtin<"long long int(_Vector<4, long long int>, _Constant int)">;
def vec_set_v4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int, _Constant int)">;
}
Expand Down
78 changes: 78 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2878,6 +2878,61 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID) {
assert(Call->getNumArgs() == 2);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
const Pointer &Vec = S.Stk.pop<Pointer>();
if (!Vec.getFieldDesc()->isPrimitiveArray())
return false;

unsigned NumElems = Vec.getNumElems();
unsigned Index =
static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));

PrimType ElemPT = Vec.getFieldDesc()->getPrimType();
// FIXME(#161685): Replace float+int split with a numeric-only type switch
if (ElemPT == PT_Float) {
S.Stk.push<Floating>(Vec.elem<Floating>(Index));
return true;
}
INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
APSInt V = Vec.elem<T>(Index).toAPSInt();
pushInteger(S, V, Call->getType());
});

return true;
}

static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID) {
assert(Call->getNumArgs() == 3);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(2));
APSInt ValAPS = popToAPSInt(S, Call->getArg(1));

const Pointer &Base = S.Stk.pop<Pointer>();
if (!Base.getFieldDesc()->isPrimitiveArray())
return false;

const Pointer &Dst = S.Stk.peek<Pointer>();

unsigned NumElems = Base.getNumElems();
unsigned Index =
static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));

PrimType ElemPT = Base.getFieldDesc()->getPrimType();
INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
for (unsigned I = 0; I != NumElems; ++I)
Dst.elem<T>(I) = Base.elem<T>(I);
Dst.elem<T>(Index) = static_cast<T>(ValAPS);
});

Dst.initializeAllElements();
return true;
}

bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -3686,6 +3741,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v8hi:
case X86::BI__builtin_ia32_vec_ext_v4si:
case X86::BI__builtin_ia32_vec_ext_v2di:
case X86::BI__builtin_ia32_vec_ext_v32qi:
case X86::BI__builtin_ia32_vec_ext_v16hi:
case X86::BI__builtin_ia32_vec_ext_v8si:
case X86::BI__builtin_ia32_vec_ext_v4di:
case X86::BI__builtin_ia32_vec_ext_v4sf:
return interp__builtin_vec_ext(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_vec_set_v4hi:
case X86::BI__builtin_ia32_vec_set_v16qi:
case X86::BI__builtin_ia32_vec_set_v8hi:
case X86::BI__builtin_ia32_vec_set_v4si:
case X86::BI__builtin_ia32_vec_set_v2di:
case X86::BI__builtin_ia32_vec_set_v32qi:
case X86::BI__builtin_ia32_vec_set_v16hi:
case X86::BI__builtin_ia32_vec_set_v8si:
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
65 changes: 65 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12235,6 +12235,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case clang::X86::BI__builtin_ia32_vec_set_v4hi:
case clang::X86::BI__builtin_ia32_vec_set_v16qi:
case clang::X86::BI__builtin_ia32_vec_set_v8hi:
case clang::X86::BI__builtin_ia32_vec_set_v4si:
case clang::X86::BI__builtin_ia32_vec_set_v2di:
case clang::X86::BI__builtin_ia32_vec_set_v32qi:
case clang::X86::BI__builtin_ia32_vec_set_v16hi:
case clang::X86::BI__builtin_ia32_vec_set_v8si:
case clang::X86::BI__builtin_ia32_vec_set_v4di: {
APValue VecVal;
APSInt Scalar, IndexAPS;
if (!EvaluateVector(E->getArg(0), VecVal, Info) ||
!EvaluateInteger(E->getArg(1), Scalar, Info) ||
!EvaluateInteger(E->getArg(2), IndexAPS, Info))
return false;

QualType ElemTy = E->getType()->castAs<VectorType>()->getElementType();
unsigned ElemWidth = Info.Ctx.getIntWidth(ElemTy);
bool ElemUnsigned = ElemTy->isUnsignedIntegerOrEnumerationType();
Scalar.setIsUnsigned(ElemUnsigned);
APSInt ElemAPS = Scalar.extOrTrunc(ElemWidth);
APValue ElemAV(ElemAPS);

unsigned NumElems = VecVal.getVectorLength();
unsigned Index =
static_cast<unsigned>(IndexAPS.getZExtValue() & (NumElems - 1));

SmallVector<APValue, 4> Elems;
Elems.reserve(NumElems);
for (unsigned ElemNum = 0; ElemNum != NumElems; ++ElemNum)
Elems.push_back(ElemNum == Index ? ElemAV : VecVal.getVectorElt(ElemNum));

return Success(APValue(Elems.data(), NumElems), E);
}
}
}

Expand Down Expand Up @@ -14822,6 +14857,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return HandleMaskBinOp(
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}

case clang::X86::BI__builtin_ia32_vec_ext_v4hi:
case clang::X86::BI__builtin_ia32_vec_ext_v16qi:
case clang::X86::BI__builtin_ia32_vec_ext_v8hi:
case clang::X86::BI__builtin_ia32_vec_ext_v4si:
case clang::X86::BI__builtin_ia32_vec_ext_v2di:
case clang::X86::BI__builtin_ia32_vec_ext_v32qi:
case clang::X86::BI__builtin_ia32_vec_ext_v16hi:
case clang::X86::BI__builtin_ia32_vec_ext_v8si:
case clang::X86::BI__builtin_ia32_vec_ext_v4di: {
APValue Vec;
APSInt IdxAPS;
if (!EvaluateVector(E->getArg(0), Vec, Info) ||
!EvaluateInteger(E->getArg(1), IdxAPS, Info))
return false;
unsigned N = Vec.getVectorLength();
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx).getInt(), E);
}
}
}

Expand Down Expand Up @@ -16638,6 +16692,17 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) {
(void)Result.fusedMultiplyAdd(SourceY, SourceZ, RM);
return true;
}

case clang::X86::BI__builtin_ia32_vec_ext_v4sf: {
APValue Vec;
APSInt IdxAPS;
if (!EvaluateVector(E->getArg(0), Vec, Info) ||
!EvaluateInteger(E->getArg(1), IdxAPS, Info))
return false;
unsigned N = Vec.getVectorLength();
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx), E);
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1039,26 +1039,30 @@ int test_mm256_extract_epi8(__m256i A) {
// CHECK: zext i8 %{{.*}} to i32
return _mm256_extract_epi8(A, 31);
}
TEST_CONSTEXPR(_mm256_extract_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 45) == 13);

int test_mm256_extract_epi16(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi16
// CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15
// CHECK: zext i16 %{{.*}} to i32
return _mm256_extract_epi16(A, 15);
}
TEST_CONSTEXPR(_mm256_extract_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 50) == 4);

int test_mm256_extract_epi32(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi32
// CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7
return _mm256_extract_epi32(A, 7);
}
TEST_CONSTEXPR(_mm256_extract_epi32(((__m256i)(__v8si){0, 5, 10, 15, 20, 25, 30, 35}), 18) == 10);

#if __x86_64__
long long test_mm256_extract_epi64(__m256i A) {
// X64-LABEL: test_mm256_extract_epi64
// X64: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3
return _mm256_extract_epi64(A, 3);
}
TEST_CONSTEXPR(_mm256_extract_epi64(((__m256i)(__v4di){5, 15, 25, 35}), 14) == 25);
#endif

__m128d test_mm256_extractf128_pd(__m256d A) {
Expand Down Expand Up @@ -1120,25 +1124,29 @@ __m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14
return _mm256_insert_epi8(x, b, 14);
}
TEST_CONSTEXPR(match_v32qi(_mm256_insert_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 77, 47), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 77, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));

__m256i test_mm256_insert_epi16(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi16
// CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4
return _mm256_insert_epi16(x, b, 4);
}
TEST_CONSTEXPR(match_v16hi(_mm256_insert_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 909, 62), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 909, 30));

__m256i test_mm256_insert_epi32(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi32
// CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5
return _mm256_insert_epi32(x, b, 5);
}
TEST_CONSTEXPR(match_v8si(_mm256_insert_epi32(((__m256i)(__v8si){ 0, 5, 10, 15, 20, 25, 30, 35}), 4321, 18), 0, 5, 4321, 15, 20, 25, 30, 35));

#if __x86_64__
__m256i test_mm256_insert_epi64(__m256i x, long long b) {
// X64-LABEL: test_mm256_insert_epi64
// X64: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2
return _mm256_insert_epi64(x, b, 2);
}
TEST_CONSTEXPR(match_v4di(_mm256_insert_epi64(((__m256i)(__v4di){5, 15, 25, 35}), -123456789LL, 10), 5, 15, -123456789LL, 35));
#endif

__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ int test_mm_extract_pi16(__m64 a) {
// CHECK: extractelement <4 x i16> {{%.*}}, i64 2
return _mm_extract_pi16(a, 2);
}
TEST_CONSTEXPR(_mm_extract_pi16(((__m64)(__v4hi){10, 20, 30, 40}), 7) == 40);

__m64 test_m_from_int(int a) {
// CHECK-LABEL: test_m_from_int
Expand Down Expand Up @@ -347,6 +348,7 @@ __m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK: insertelement <4 x i16>
return _mm_insert_pi16(a, d, 2);
}
TEST_CONSTEXPR(match_v4hi(_mm_insert_pi16(((__m64)(__v4hi){0, 1, 2, 3}), 77, 10), 0, 1, 77, 3));

__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_madd_pi16
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,12 +723,14 @@ int test_mm_extract_epi16(__m128i A) {
// CHECK: zext i16 %{{.*}} to i32
return _mm_extract_epi16(A, 1);
}
TEST_CONSTEXPR(_mm_extract_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 25) == 10);

__m128i test_mm_insert_epi16(__m128i A, int B) {
// CHECK-LABEL: test_mm_insert_epi16
// CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
return _mm_insert_epi16(A, B, 0);
}
TEST_CONSTEXPR(match_v8hi(_mm_insert_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 555, 17), 0, 555, 20, 30, 40, 50, 60, 70));

void test_mm_lfence(void) {
// CHECK-LABEL: test_mm_lfence
Expand Down
Loading