Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def emms : X86Builtin<"void()"> {
let Features = "mmx";
}

let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in {
let Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<64>], Features = "sse" in {
def vec_ext_v4hi : X86Builtin<"short(_Vector<4, short>, _Constant int)">;
def vec_set_v4hi : X86Builtin<"_Vector<4, short>(_Vector<4, short>, short, _Constant int)">;
}
Expand Down Expand Up @@ -92,13 +92,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
}

let Features = "sse2" in {
def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
Expand All @@ -108,6 +101,12 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;

def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
}

let Features = "sse3" in {
Expand Down Expand Up @@ -323,9 +322,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand All @@ -338,6 +334,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector

def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;

def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}

let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -560,7 +560,7 @@ let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskstoreps : X86Builtin<"void(_Vector<4, float *>, _Vector<4, int>, _Vector<4, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vec_ext_v32qi : X86Builtin<"char(_Vector<32, char>, _Constant int)">;
def vec_ext_v16hi : X86Builtin<"short(_Vector<16, short>, _Constant int)">;
def vec_ext_v8si : X86Builtin<"int(_Vector<8, int>, _Constant int)">;
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86_64.td
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti64 : X86Builtin<"void(long long int *, long long int)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vec_set_v2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int, _Constant int)">;
}

let Features = "crc32", Attributes = [NoThrow, Const] in {
def crc32di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vec_ext_v4di : X86Builtin<"long long int(_Vector<4, long long int>, _Constant int)">;
def vec_set_v4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int, _Constant int)">;
}
Expand Down
77 changes: 77 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2878,6 +2878,60 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID) {
assert(Call->getNumArgs() == 2);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
const Pointer &Vec = S.Stk.pop<Pointer>();
if (!Vec.getFieldDesc()->isPrimitiveArray())
return false;

unsigned NumElts = Vec.getNumElems();
unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1));

switch (ID) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A switch for only two cases seems wrong?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't actually care whether this is integer/float - any reason we can't use TYPE_SWITCH ? We're going to have the same issue with shuffles

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried using TYPE_SWITCH, but even with if constexpr gurad it expands to pointer/member-pointer cases which don’t have toAPSInt(), so the shared body fails to compile and I get E No member named 'toAPSInt' in 'clang::interp::MemberPointer' clang (no_member) [2903, 37] and No member named 'toAPSInt' in 'clang::interp::Pointer' clang (no_member) [2903, 37]
I could split the logic: handle PT_Float explicitly and use INT_TYPE_SWITCH_NO_BOOL for the integer cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is always a little weird, I think this fine with an if statement. We could add another macro for such cases but that's for another patch.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AdityaC4 Please can you raise a bug so we can track this - I really don't want to handle fp/int type elements separately if we don't need to.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened a tracking issue to add a numeric-only TYPE_SWITCH #161685 . For this patch I kept the tiny if (PT_Float) followed by INT_TYPE_SWITCH_NO_BOOL for integers and added a FIXME referencing the issue.

case X86::BI__builtin_ia32_vec_ext_v4sf:
S.Stk.push<Floating>(Vec.elem<Floating>(Index));
return true;
default: {
PrimType ElemPT = Vec.getFieldDesc()->getPrimType();
INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
APSInt V = Vec.elem<T>(Index).toAPSInt();
pushInteger(S, V, Call->getType());
});
return true;
}
}
}

static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID) {
assert(Call->getNumArgs() == 3);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(2));
APSInt ValAPS = popToAPSInt(S, Call->getArg(1));

const Pointer &Base = S.Stk.pop<Pointer>();
if (!Base.getFieldDesc()->isPrimitiveArray())
return false;

const Pointer &Dst = S.Stk.peek<Pointer>();

unsigned NumElts = Base.getNumElems();
unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1));

PrimType ElemPT = Base.getFieldDesc()->getPrimType();
INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
for (unsigned I = 0; I != NumElts; ++I)
Dst.elem<T>(I) = Base.elem<T>(I);
Dst.elem<T>(Index) = static_cast<T>(ValAPS);
});

Dst.initializeAllElements();
return true;
}

bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -3686,6 +3740,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v8hi:
case X86::BI__builtin_ia32_vec_ext_v4si:
case X86::BI__builtin_ia32_vec_ext_v2di:
case X86::BI__builtin_ia32_vec_ext_v32qi:
case X86::BI__builtin_ia32_vec_ext_v16hi:
case X86::BI__builtin_ia32_vec_ext_v8si:
case X86::BI__builtin_ia32_vec_ext_v4di:
case X86::BI__builtin_ia32_vec_ext_v4sf:
return interp__builtin_vec_ext(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_vec_set_v4hi:
case X86::BI__builtin_ia32_vec_set_v16qi:
case X86::BI__builtin_ia32_vec_set_v8hi:
case X86::BI__builtin_ia32_vec_set_v4si:
case X86::BI__builtin_ia32_vec_set_v2di:
case X86::BI__builtin_ia32_vec_set_v32qi:
case X86::BI__builtin_ia32_vec_set_v16hi:
case X86::BI__builtin_ia32_vec_set_v8si:
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
65 changes: 65 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12235,6 +12235,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case clang::X86::BI__builtin_ia32_vec_set_v4hi:
case clang::X86::BI__builtin_ia32_vec_set_v16qi:
case clang::X86::BI__builtin_ia32_vec_set_v8hi:
case clang::X86::BI__builtin_ia32_vec_set_v4si:
case clang::X86::BI__builtin_ia32_vec_set_v2di:
case clang::X86::BI__builtin_ia32_vec_set_v32qi:
case clang::X86::BI__builtin_ia32_vec_set_v16hi:
case clang::X86::BI__builtin_ia32_vec_set_v8si:
case clang::X86::BI__builtin_ia32_vec_set_v4di: {
APValue VecVal;
APSInt Scalar, IndexAPS;
if (!EvaluateVector(E->getArg(0), VecVal, Info) ||
!EvaluateInteger(E->getArg(1), Scalar, Info) ||
!EvaluateInteger(E->getArg(2), IndexAPS, Info))
return false;

QualType ElemTy = E->getType()->castAs<VectorType>()->getElementType();
unsigned ElemWidth = Info.Ctx.getIntWidth(ElemTy);
bool ElemUnsigned = ElemTy->isUnsignedIntegerOrEnumerationType();
Scalar.setIsUnsigned(ElemUnsigned);
APSInt ElemAPS = Scalar.extOrTrunc(ElemWidth);
APValue ElemAV(ElemAPS);

unsigned NumElts = VecVal.getVectorLength();
unsigned Index =
static_cast<unsigned>(IndexAPS.getZExtValue() & (NumElts - 1));

SmallVector<APValue, 4> Elts;
Elts.reserve(NumElts);
for (unsigned EltNum = 0; EltNum != NumElts; ++EltNum)
Elts.push_back(EltNum == Index ? ElemAV : VecVal.getVectorElt(EltNum));

return Success(APValue(Elts.data(), NumElts), E);
}
}
}

Expand Down Expand Up @@ -14822,6 +14857,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return HandleMaskBinOp(
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}

case clang::X86::BI__builtin_ia32_vec_ext_v4hi:
case clang::X86::BI__builtin_ia32_vec_ext_v16qi:
case clang::X86::BI__builtin_ia32_vec_ext_v8hi:
case clang::X86::BI__builtin_ia32_vec_ext_v4si:
case clang::X86::BI__builtin_ia32_vec_ext_v2di:
case clang::X86::BI__builtin_ia32_vec_ext_v32qi:
case clang::X86::BI__builtin_ia32_vec_ext_v16hi:
case clang::X86::BI__builtin_ia32_vec_ext_v8si:
case clang::X86::BI__builtin_ia32_vec_ext_v4di: {
APValue Vec;
APSInt IdxAPS;
if (!EvaluateVector(E->getArg(0), Vec, Info) ||
!EvaluateInteger(E->getArg(1), IdxAPS, Info))
return false;
unsigned N = Vec.getVectorLength();
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx).getInt(), E);
}
}
}

Expand Down Expand Up @@ -16638,6 +16692,17 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) {
(void)Result.fusedMultiplyAdd(SourceY, SourceZ, RM);
return true;
}

case clang::X86::BI__builtin_ia32_vec_ext_v4sf: {
APValue Vec;
APSInt IdxAPS;
if (!EvaluateVector(E->getArg(0), Vec, Info) ||
!EvaluateInteger(E->getArg(1), IdxAPS, Info))
return false;
unsigned N = Vec.getVectorLength();
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx), E);
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1039,26 +1039,30 @@ int test_mm256_extract_epi8(__m256i A) {
// CHECK: zext i8 %{{.*}} to i32
return _mm256_extract_epi8(A, 31);
}
TEST_CONSTEXPR(_mm256_extract_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 45) == 13);

int test_mm256_extract_epi16(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi16
// CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15
// CHECK: zext i16 %{{.*}} to i32
return _mm256_extract_epi16(A, 15);
}
TEST_CONSTEXPR(_mm256_extract_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 50) == 4);

int test_mm256_extract_epi32(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi32
// CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7
return _mm256_extract_epi32(A, 7);
}
TEST_CONSTEXPR(_mm256_extract_epi32(((__m256i)(__v8si){0, 5, 10, 15, 20, 25, 30, 35}), 18) == 10);

#if __x86_64__
long long test_mm256_extract_epi64(__m256i A) {
// X64-LABEL: test_mm256_extract_epi64
// X64: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3
return _mm256_extract_epi64(A, 3);
}
TEST_CONSTEXPR(_mm256_extract_epi64(((__m256i)(__v4di){5, 15, 25, 35}), 14) == 25);
#endif

__m128d test_mm256_extractf128_pd(__m256d A) {
Expand Down Expand Up @@ -1120,25 +1124,29 @@ __m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14
return _mm256_insert_epi8(x, b, 14);
}
TEST_CONSTEXPR(match_v32qi(_mm256_insert_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 77, 47), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 77, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));

__m256i test_mm256_insert_epi16(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi16
// CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4
return _mm256_insert_epi16(x, b, 4);
}
TEST_CONSTEXPR(match_v16hi(_mm256_insert_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 909, 62), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 909, 30));

__m256i test_mm256_insert_epi32(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi32
// CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5
return _mm256_insert_epi32(x, b, 5);
}
TEST_CONSTEXPR(match_v8si(_mm256_insert_epi32(((__m256i)(__v8si){ 0, 5, 10, 15, 20, 25, 30, 35}), 4321, 18), 0, 5, 4321, 15, 20, 25, 30, 35));

#if __x86_64__
__m256i test_mm256_insert_epi64(__m256i x, long long b) {
// X64-LABEL: test_mm256_insert_epi64
// X64: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2
return _mm256_insert_epi64(x, b, 2);
}
TEST_CONSTEXPR(match_v4di(_mm256_insert_epi64(((__m256i)(__v4di){5, 15, 25, 35}), -123456789LL, 10), 5, 15, -123456789LL, 35));
#endif

__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ int test_mm_extract_pi16(__m64 a) {
// CHECK: extractelement <4 x i16> {{%.*}}, i64 2
return _mm_extract_pi16(a, 2);
}
TEST_CONSTEXPR(_mm_extract_pi16(((__m64)(__v4hi){10, 20, 30, 40}), 7) == 40);

__m64 test_m_from_int(int a) {
// CHECK-LABEL: test_m_from_int
Expand Down Expand Up @@ -347,6 +348,7 @@ __m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK: insertelement <4 x i16>
return _mm_insert_pi16(a, d, 2);
}
TEST_CONSTEXPR(match_v4hi(_mm_insert_pi16(((__m64)(__v4hi){0, 1, 2, 3}), 77, 10), 0, 1, 77, 3));

__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_madd_pi16
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,12 +723,14 @@ int test_mm_extract_epi16(__m128i A) {
// CHECK: zext i16 %{{.*}} to i32
return _mm_extract_epi16(A, 1);
}
TEST_CONSTEXPR(_mm_extract_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 25) == 10);

__m128i test_mm_insert_epi16(__m128i A, int B) {
// CHECK-LABEL: test_mm_insert_epi16
// CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
return _mm_insert_epi16(A, B, 0);
}
TEST_CONSTEXPR(match_v8hi(_mm_insert_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 555, 17), 0, 555, 20, 30, 40, 50, 60, 70));

void test_mm_lfence(void) {
// CHECK-LABEL: test_mm_lfence
Expand Down
Loading