-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow MMX/SSE/AVX2 PSIGN intrinsics to be used in constexpr #163685
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@RKSimon 🙏 |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Shawn K (kimsh02) ChangesFix #155812 Full diff: https://github.com/llvm/llvm-project/pull/163685.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index d03c778740ad3..825534436704c 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -123,13 +123,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
}
- let Features = "ssse3" in {
- def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
- def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- }
-
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def psignb128
+ : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
+ def psignw128
+ : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
+ def psignd128
+ : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
@@ -608,10 +608,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
- def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
- def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
- def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+ def psadbw256
+ : X86Builtin<
+ "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
@@ -682,7 +681,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
-
+
+ def psignb256
+ : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
+ def psignw256
+ : X86Builtin<
+ "_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
+ def psignd256
+ : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
+
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 0cb491063057c..cbbc17cd7c3a5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3003,6 +3003,35 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_psign_op(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 2);
+
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ const Pointer &Result = S.Stk.peek<Pointer>();
+
+ unsigned ResultLen = A.getNumElems();
+ QualType ElemQT = getElemType(A);
+ OptPrimType ElemT = S.getContext().classify(ElemQT);
+ unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);
+ bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+ INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
+ for (unsigned I = 0; I != ResultLen; ++I) {
+ APSInt AElem = A.elem<T>(I).toAPSInt();
+ APSInt BElem = B.elem<T>(I).toAPSInt();
+ APSInt ResultElem =
+ (BElem.isNegative() ? -AElem
+ : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
+ : AElem);
+ Result.elem<T>(I) = static_cast<T>(ResultElem);
+ }
+ });
+ Result.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
@@ -3630,6 +3659,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
.extractBits(16, 1);
});
+ case X86::BI__builtin_ia32_psignb128:
+ case X86::BI__builtin_ia32_psignb256:
+ case X86::BI__builtin_ia32_psignw128:
+ case X86::BI__builtin_ia32_psignw256:
+ case X86::BI__builtin_ia32_psignd128:
+ case X86::BI__builtin_ia32_psignd256:
+ return interp__builtin_ia32_psign_op(S, OpPC, Call);
+
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e308c171ed551..b9216cd89d7b7 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12245,6 +12245,36 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_psignb128:
+ case X86::BI__builtin_ia32_psignb256:
+ case X86::BI__builtin_ia32_psignw128:
+ case X86::BI__builtin_ia32_psignw256:
+ case X86::BI__builtin_ia32_psignd128:
+ case X86::BI__builtin_ia32_psignd256: {
+ APValue ASource, BSource;
+ if (!EvaluateAsRValue(Info, E->getArg(0), ASource) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BSource))
+ return false;
+ unsigned SourceLen = ASource.getVectorLength();
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ QualType ElemQT = VT->getElementType();
+ unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);
+ bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+ SmallVector<APValue, 16> Result;
+ Result.reserve(SourceLen);
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APSInt &AElem = ASource.getVectorElt(I).getInt();
+ APSInt &BElem = BSource.getVectorElt(I).getInt();
+ APSInt ResultElem =
+ (BElem.isNegative() ? -AElem
+ : BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
+ : AElem);
+ Result.emplace_back(ResultElem);
+ }
+ return Success(APValue(Result.data(), Result.size()), E);
+ }
+
case X86::BI__builtin_ia32_blendvpd:
case X86::BI__builtin_ia32_blendvpd256:
case X86::BI__builtin_ia32_blendvps:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index d35bc0e84a7a1..757085a930122 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1976,10 +1976,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) {
/// \param __b
/// A 256-bit integer vector].
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi8(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi8(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
}
/// Sets each element of the result to the corresponding element of the
@@ -1997,10 +1996,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
}
/// Sets each element of the result to the corresponding element of the
@@ -2018,10 +2016,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_sign_epi32(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_sign_epi32(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
}
/// Shifts each 128-bit half of the 256-bit integer vector \a a left by
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 5d0f20f4d527d..95b32c0087ecb 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -641,10 +641,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) {
/// A 128-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi8(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi8(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
}
/// For each 16-bit integer in the first source operand, perform one of
@@ -667,10 +666,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
/// A 128-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi16(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
}
/// For each 32-bit integer in the first source operand, perform one of
@@ -693,10 +691,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
/// A 128-bit integer vector containing control doublewords corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_sign_epi32(__m128i __a, __m128i __b)
-{
- return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sign_epi32(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
}
/// For each 8-bit integer in the first source operand, perform one of
@@ -719,11 +716,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
/// A 64-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi8(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
- (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
+ (__v16qi)__zext128(__b)));
}
/// For each 16-bit integer in the first source operand, perform one of
@@ -746,11 +742,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
- (__v8hi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
+ __m64 __b) {
+ return __trunc64(
+ __builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
}
/// For each 32-bit integer in the first source operand, perform one of
@@ -773,11 +768,10 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing two control doublewords corresponding
/// to positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_sign_pi32(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
- (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(
+ __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
}
#undef __anyext128
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index a505d70a98203..c785363e07f53 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -1151,23 +1151,30 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) {
return _mm256_shufflelo_epi16(a, 83);
}
TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
+
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi8
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_sign_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8(
+ (__m256i)(__v32qi){'B','r','i','g','h','t','n','e','o','n','f','o','x','j','u','m','p','s','o','v','e','r','p','r','o','g','r','a','m','m','e','r'},
+ (__m256i)(__v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'t','h','i','s'}),
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'m','m','e','r'));
__m256i test_mm256_sign_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_sign_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,'h','i', 'b','y','e','!'}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0));
__m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_sign_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed,'o','o','p','s'}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, -'o',-'o',-'p',-'s'));
__m256i test_mm256_slli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_slli_epi16
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index c1ac57b44e58d..0065b87108914 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -598,23 +598,28 @@ __m64 test_mm_shuffle_pi16(__m64 a) {
return _mm_shuffle_pi16(a, 3);
}
TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0));
+
__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi8
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
return _mm_sign_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){6,7,6,7, 6,7,6,7}, (__m64)(__v8qi){1,1,1,1, 0,0,0,0}), 6,7,6,7, 0,0,0,0));
__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
return _mm_sign_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_sign_pi16((__m64)(__v4hi){-1,0,1,0}, (__m64)(__v4hi){1,0,-1,0}), -1,0,-1,0));
__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
return _mm_sign_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_sign_pi32((__m64)(__v2si){0x7FFF, -1}, (__m64)(__v2si){-1, 0x7FFF}), -0x7FFF, -1));
__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi16
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index f70afc01a1963..d3fde1e2de653 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -125,15 +125,18 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) {
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_sign_epi8(a, b);
}
+TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qi){'g','r','i','n','d','i','n','g', 'l','e','e','t','c','o','d','e'}, (__m128i)(__v16qi){0,1,0,1, 1,1,0,0, 0,0,1,1, 1,0,1,0}), 0,'r',0,'n', 'd','i',0,0, 0,0,'e','t', 'c',0,'d',0));
__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sign_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_sign_epi16((__m128i)(__v8hi){0,-2,0,-4,0,-6,0,-8}, (__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,7,-8}), 0,2,0,4,0,6,0,8));
__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sign_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_sign_epi32((__m128i)(__v4si){-1,-2,-3,-4}, (__m128i)(__v4si){-4,-3,-2,-1}), 1,2,3,4));
|
case X86::BI__builtin_ia32_psignd256: | ||
return interp__builtin_elementwise_int_binop( | ||
S, OpPC, Call, [](const APSInt &AElem, const APSInt &BElem) -> APInt { | ||
return BElem[BElem.getBitWidth() - 1] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BElem.isNegative()?
? static_cast<const APInt &>(-AElem) | ||
: BElem.isZero() ? APInt(AElem.getBitWidth(), 0) | ||
: static_cast<const APInt &>(AElem); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You might be able to just do this (we don't use anything from the APSInt at all so we can tweak the callback to take APInt refs):
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, [](const APInt &AElem, const APInt &BElem) {
if (BElem.isZero())
return APInt::getZero(AElem.getBitWidth());
if (BElem.isNegative())
return -AElem;
return AElem;
});
Fix #155812