-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow element extraction/insertion intrinsics to be used in constexpr #159753 #161302
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Aditya Chaudhari (AdityaC4) ChangesFIXES: #159753 Enable constexpr evaluation for X86 vector element extract/insert builtins. and adds corresponding tests Full diff: https://github.com/llvm/llvm-project/pull/161302.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 77e599587edc3..11e0a6611c5d8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -52,7 +52,7 @@ def emms : X86Builtin<"void()"> {
let Features = "mmx";
}
-let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in {
+let Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<64>], Features = "sse" in {
def vec_ext_v4hi : X86Builtin<"short(_Vector<4, short>, _Constant int)">;
def vec_set_v4hi : X86Builtin<"_Vector<4, short>(_Vector<4, short>, short, _Constant int)">;
}
@@ -92,13 +92,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
}
- let Features = "sse2" in {
- def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
- def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
- def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
- def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
- def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
- }
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
@@ -108,6 +101,12 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
+
+ def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
+ def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
+ def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
+ def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
+ def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
}
let Features = "sse3" in {
@@ -323,9 +322,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
- def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
- def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
- def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -338,6 +334,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
+
+ def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
+ def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
+ def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
@@ -560,7 +560,7 @@ let Features = "avx", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskstoreps : X86Builtin<"void(_Vector<4, float *>, _Vector<4, int>, _Vector<4, float>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vec_ext_v32qi : X86Builtin<"char(_Vector<32, char>, _Constant int)">;
def vec_ext_v16hi : X86Builtin<"short(_Vector<16, short>, _Constant int)">;
def vec_ext_v8si : X86Builtin<"int(_Vector<8, int>, _Constant int)">;
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td
index 214b175ace5eb..275278c5ac089 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.td
+++ b/clang/include/clang/Basic/BuiltinsX86_64.td
@@ -56,7 +56,7 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti64 : X86Builtin<"void(long long int *, long long int)">;
}
-let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vec_set_v2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, long long int, _Constant int)">;
}
@@ -64,7 +64,7 @@ let Features = "crc32", Attributes = [NoThrow, Const] in {
def crc32di : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vec_ext_v4di : X86Builtin<"long long int(_Vector<4, long long int>, _Constant int)">;
def vec_set_v4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, long long int, _Constant int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2e97fcafdfef..b0b885d1bb097 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2878,6 +2878,66 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, unsigned ID) {
+ assert(Call->getNumArgs() == 2);
+
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
+ const Pointer &Vec = S.Stk.pop<Pointer>();
+ if (!Vec.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ unsigned NumElts = Vec.getNumElems();
+ unsigned Index =
+ (NumElts == 0)
+ ? 0u
+ : static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1));
+
+ switch (ID) {
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ S.Stk.push<Floating>(Vec.elem<Floating>(Index));
+ return true;
+ default: {
+ PrimType ElemPT = Vec.getFieldDesc()->getPrimType();
+ INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
+ APSInt V = Vec.elem<T>(Index).toAPSInt();
+ pushInteger(S, V, Call->getType());
+ });
+ return true;
+ }
+ }
+}
+
+static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, unsigned ID) {
+ assert(Call->getNumArgs() == 3);
+
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(2));
+ APSInt ValAPS = popToAPSInt(S, Call->getArg(1));
+
+ const Pointer &Base = S.Stk.pop<Pointer>();
+ if (!Base.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned NumElts = Base.getNumElems();
+ unsigned Index =
+ (NumElts == 0)
+ ? 0u
+ : static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1));
+
+ PrimType ElemPT = Base.getFieldDesc()->getPrimType();
+ INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
+ for (unsigned I = 0; I != NumElts; ++I)
+ Dst.elem<T>(I) = Base.elem<T>(I);
+ Dst.elem<T>(Index) = static_cast<T>(ValAPS);
+ });
+
+ Dst.initializeAllElements();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3686,6 +3746,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_vec_ext_v4hi:
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ return interp__builtin_vec_ext(S, OpPC, Call, BuiltinID);
+
+ case X86::BI__builtin_ia32_vec_set_v4hi:
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di:
+ return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..98cdbc03d00aa 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12235,6 +12235,41 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+
+ case clang::X86::BI__builtin_ia32_vec_set_v4hi:
+ case clang::X86::BI__builtin_ia32_vec_set_v16qi:
+ case clang::X86::BI__builtin_ia32_vec_set_v8hi:
+ case clang::X86::BI__builtin_ia32_vec_set_v4si:
+ case clang::X86::BI__builtin_ia32_vec_set_v2di:
+ case clang::X86::BI__builtin_ia32_vec_set_v32qi:
+ case clang::X86::BI__builtin_ia32_vec_set_v16hi:
+ case clang::X86::BI__builtin_ia32_vec_set_v8si:
+ case clang::X86::BI__builtin_ia32_vec_set_v4di: {
+ APValue VecVal;
+ APSInt Scalar, IndexAPS;
+ if (!EvaluateVector(E->getArg(0), VecVal, Info) ||
+ !EvaluateInteger(E->getArg(1), Scalar, Info) ||
+ !EvaluateInteger(E->getArg(2), IndexAPS, Info))
+ return false;
+
+ QualType ElemTy = E->getType()->castAs<VectorType>()->getElementType();
+ unsigned ElemWidth = Info.Ctx.getIntWidth(ElemTy);
+ bool ElemUnsigned = ElemTy->isUnsignedIntegerOrEnumerationType();
+ Scalar.setIsUnsigned(ElemUnsigned);
+ APSInt ElemAPS = Scalar.extOrTrunc(ElemWidth);
+ APValue ElemAV(ElemAPS);
+
+ unsigned NumElts = VecVal.getVectorLength();
+ unsigned Index =
+ static_cast<unsigned>(IndexAPS.getZExtValue() & (NumElts - 1));
+
+ SmallVector<APValue, 4> Elts;
+ Elts.reserve(NumElts);
+ for (unsigned EltNum = 0; EltNum != NumElts; ++EltNum)
+ Elts.push_back(EltNum == Index ? ElemAV : VecVal.getVectorElt(EltNum));
+
+ return Success(APValue(Elts.data(), NumElts), E);
+ }
}
}
@@ -14822,6 +14857,25 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return HandleMaskBinOp(
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
}
+
+ case clang::X86::BI__builtin_ia32_vec_ext_v4hi:
+ case clang::X86::BI__builtin_ia32_vec_ext_v16qi:
+ case clang::X86::BI__builtin_ia32_vec_ext_v8hi:
+ case clang::X86::BI__builtin_ia32_vec_ext_v4si:
+ case clang::X86::BI__builtin_ia32_vec_ext_v2di:
+ case clang::X86::BI__builtin_ia32_vec_ext_v32qi:
+ case clang::X86::BI__builtin_ia32_vec_ext_v16hi:
+ case clang::X86::BI__builtin_ia32_vec_ext_v8si:
+ case clang::X86::BI__builtin_ia32_vec_ext_v4di: {
+ APValue Vec;
+ APSInt IdxAPS;
+ if (!EvaluateVector(E->getArg(0), Vec, Info) ||
+ !EvaluateInteger(E->getArg(1), IdxAPS, Info))
+ return false;
+ unsigned N = Vec.getVectorLength();
+ unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
+ return Success(Vec.getVectorElt(Idx).getInt(), E);
+ }
}
}
@@ -16638,6 +16692,17 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) {
(void)Result.fusedMultiplyAdd(SourceY, SourceZ, RM);
return true;
}
+
+ case clang::X86::BI__builtin_ia32_vec_ext_v4sf: {
+ APValue Vec;
+ APSInt IdxAPS;
+ if (!EvaluateVector(E->getArg(0), Vec, Info) ||
+ !EvaluateInteger(E->getArg(1), IdxAPS, Info))
+ return false;
+ unsigned N = Vec.getVectorLength();
+ unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
+ return Success(Vec.getVectorElt(Idx), E);
+ }
}
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 3018bb9719b89..5f08b6be81ab7 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1039,6 +1039,7 @@ int test_mm256_extract_epi8(__m256i A) {
// CHECK: zext i8 %{{.*}} to i32
return _mm256_extract_epi8(A, 31);
}
+TEST_CONSTEXPR(_mm256_extract_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 45) == 13);
int test_mm256_extract_epi16(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi16
@@ -1046,12 +1047,14 @@ int test_mm256_extract_epi16(__m256i A) {
// CHECK: zext i16 %{{.*}} to i32
return _mm256_extract_epi16(A, 15);
}
+TEST_CONSTEXPR(_mm256_extract_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 50) == 4);
int test_mm256_extract_epi32(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi32
// CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7
return _mm256_extract_epi32(A, 7);
}
+TEST_CONSTEXPR(_mm256_extract_epi32(((__m256i)(__v8si){0, 5, 10, 15, 20, 25, 30, 35}), 18) == 10);
#if __x86_64__
long long test_mm256_extract_epi64(__m256i A) {
@@ -1059,6 +1062,7 @@ long long test_mm256_extract_epi64(__m256i A) {
// X64: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3
return _mm256_extract_epi64(A, 3);
}
+TEST_CONSTEXPR(_mm256_extract_epi64(((__m256i)(__v4di){5, 15, 25, 35}), 14) == 25);
#endif
__m128d test_mm256_extractf128_pd(__m256d A) {
@@ -1120,18 +1124,21 @@ __m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14
return _mm256_insert_epi8(x, b, 14);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_insert_epi8(((__m256i)(__v32qs){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 77, 47), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 77, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
__m256i test_mm256_insert_epi16(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi16
// CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4
return _mm256_insert_epi16(x, b, 4);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_insert_epi16(((__m256i)(__v16hi){0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}), 909, 62), 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 909, 30));
__m256i test_mm256_insert_epi32(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi32
// CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5
return _mm256_insert_epi32(x, b, 5);
}
+TEST_CONSTEXPR(match_v8si(_mm256_insert_epi32(((__m256i)(__v8si){ 0, 5, 10, 15, 20, 25, 30, 35}), 4321, 18), 0, 5, 4321, 15, 20, 25, 30, 35));
#if __x86_64__
__m256i test_mm256_insert_epi64(__m256i x, long long b) {
@@ -1139,6 +1146,7 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) {
// X64: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2
return _mm256_insert_epi64(x, b, 2);
}
+TEST_CONSTEXPR(match_v4di(_mm256_insert_epi64(((__m256i)(__v4di){5, 15, 25, 35}), -123456789LL, 10), 5, 15, -123456789LL, 35));
#endif
__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 26c5f7315457e..f9ee32e440795 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -292,6 +292,7 @@ int test_mm_extract_pi16(__m64 a) {
// CHECK: extractelement <4 x i16> {{%.*}}, i64 2
return _mm_extract_pi16(a, 2);
}
+TEST_CONSTEXPR(_mm_extract_pi16(((__m64)(__v4hi){10, 20, 30, 40}), 7) == 40);
__m64 test_m_from_int(int a) {
// CHECK-LABEL: test_m_from_int
@@ -347,6 +348,7 @@ __m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK: insertelement <4 x i16>
return _mm_insert_pi16(a, d, 2);
}
+TEST_CONSTEXPR(match_v4hi(_mm_insert_pi16(((__m64)(__v4hi){0, 1, 2, 3}), 77, 10), 0, 1, 77, 3));
__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_madd_pi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 84b90c09444c2..65bfec39c8f5a 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -723,12 +723,14 @@ int test_mm_extract_epi16(__m128i A) {
// CHECK: zext i16 %{{.*}} to i32
return _mm_extract_epi16(A, 1);
}
+TEST_CONSTEXPR(_mm_extract_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 25) == 10);
__m128i test_mm_insert_epi16(__m128i A, int B) {
// CHECK-LABEL: test_mm_insert_epi16
// CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0
return _mm_insert_epi16(A, B, 0);
}
+TEST_CONSTEXPR(match_v8hi(_mm_insert_epi16(((__m128i)(__v8hi){0, 10, 20, 30, 40, 50, 60, 70}), 555, 17), 0, 555, 20, 30, 40, 50, 60, 70));
void test_mm_lfence(void) {
// CHECK-LABEL: test_mm_lfence
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 3c3724643870e..eee479a755ab4 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -231,24 +231,28 @@ int test_mm_extract_epi8(__m128i x) {
// CHECK: zext i8 %{{.*}} to i32
return _mm_extract_epi8(x, 1);
}
+TEST_CONSTEXPR(_mm_extract_epi8(((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 20) == 4);
int test_mm_extract_epi32(__m128i x) {
// CHECK-LABEL: test_mm_extract_epi32
// CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1
return _mm_extract_epi32(x, 1);
}
+TEST_CONSTEXPR(_mm_extract_epi32(((__m128i)(__v4si){1, 3, 5, 7}), 10) == 5);
long long test_mm_extract_epi64(__m128i x) {
// CHECK-LABEL: test_mm_extract_epi64
// CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1
return _mm_extract_epi64(x, 1);
}
+TEST_CONSTEXPR(_mm_extract_epi64(((__m128i)(__v2di){11, 22}), 5) == 22);
int test_mm_extract_ps(__m128 x) {
// CHECK-LABEL: test_mm_extract_ps
// CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1
return _mm_extract_ps(x, 1);
}
+TEST_CONSTEXPR(_mm_extract_ps(((__m128){1.25f, 2.5f, 3.75f, 5.0f}), 6) == __builtin_bit_cast(int, 3.75f));
__m128d test_mm_floor_pd(__m128d x) {
// CHECK-LABEL: test_mm_floor_pd
@@ -279,12 +283,14 @@ __m128i test_mm_insert_epi8(__m128i x, char b) {
// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 1
return _mm_insert_epi8(x, b, 1);
}
+TEST_CONSTEXPR(match_v16qi(_mm_insert_epi8(((__m128i)(__v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 101, 33), 0, 101, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
__m128i test_mm_insert_epi32(__m128i x, int b) {
// CHECK-LABEL: test_mm_insert_epi32
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 1
return _mm_insert_epi32(x, b, 1);
}
+TEST_CONSTEXPR(match_v4si(_mm_insert_epi32(((__m128i)(__v4si){0, 1, 2, 3}), 5678, 18), 0, 1, 5678, 3));
#ifdef __x86_64__
__m128i test_mm_insert_epi64(__m128i x, long long b) {
@@ -292,6 +298,7 @@ __m128i test_mm_insert_epi64(__m128i x, long long b) {
// X64: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1
return _mm_insert_epi64(x, b, 1);
}
+TEST_CONSTEXPR(match_v2di(_mm_insert_epi64(((__m128i)(__v2di){100, 200}), -999, 9), 100, -999));
#endif
__m128 test_mm_insert_ps(__m128 x, __m128 y) {
|
…allow element extraction/insertion intrinsics to be used in constexpr llvm#159753 rebase/sove merge conflict with latest main
8d91846
to
0c07c06
Compare
unsigned NumElts = Vec.getNumElems(); | ||
unsigned Index = static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElts - 1)); | ||
|
||
switch (ID) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A switch for only two cases seems wrong?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't actually care whether this is integer/float - any reason we can't use TYPE_SWITCH ? We're going to have the same issue with shuffles
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried using TYPE_SWITCH, but even with if constexpr
gurad it expands to pointer/member-pointer cases which don’t have toAPSInt(), so the shared body fails to compile and I get E No member named 'toAPSInt' in 'clang::interp::MemberPointer' clang (no_member) [2903, 37]
and No member named 'toAPSInt' in 'clang::interp::Pointer' clang (no_member) [2903, 37]
I could split the logic: handle PT_Float explicitly and use INT_TYPE_SWITCH_NO_BOOL for the integer cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah this is always a little weird, I think this fine with an if
statement. We could add another macro for such cases but that's for another patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@AdityaC4 Please can you raise a bug so we can track this - I really don't want to handle fp/int type elements separately if we don't need to.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Opened a tracking issue to add a numeric-only TYPE_SWITCH #161685 . For this patch I kept the tiny if (PT_Float)
followed by INT_TYPE_SWITCH_NO_BOOL
for integers and added a FIXME
referencing the issue.
…andler; consistently use`Elem`
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/28456 Here is the relevant piece of the build log for the reference
|
…allow element extraction/insertion intrinsics to be used in constexpr llvm#159753 (llvm#161302) FIXES: llvm#159753 Enable constexpr evaluation for X86 vector element extract/insert builtins. and adds corresponding tests Index is masked with `(Idx & (NumElts - 1))`, matching existing CodeGen.
FIXES: #159753
Enable constexpr evaluation for X86 vector element extract/insert builtins. and adds corresponding tests
Index is masked with
(Idx & (NumElts - 1))
, matching existing CodeGen.