Skip to content

Commit b1b4415

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow MMX/SSE/AVX2 PSIGN intrinsics to be used in constexpr
1 parent 154138c commit b1b4415

File tree

8 files changed

+130
-50
lines changed

8 files changed

+130
-50
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
123123
}
124124
}
125125

126-
let Features = "ssse3" in {
127-
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
128-
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
129-
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
130-
}
131-
132126
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
127+
def psignb128
128+
: X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
129+
def psignw128
130+
: X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
131+
def psignd128
132+
: X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
133133
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
134134
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
135135
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
@@ -608,10 +608,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
608608
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
609609

610610
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
611-
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
612-
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
613-
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
614-
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
611+
def psadbw256
612+
: X86Builtin<
613+
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
615614
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
616615
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
617616
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
@@ -682,7 +681,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
682681
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
683682
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
684683
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
685-
684+
685+
def psignb256
686+
: X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
687+
def psignw256
688+
: X86Builtin<
689+
"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
690+
def psignd256
691+
: X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
692+
686693
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
687694
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
688695
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3003,6 +3003,35 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
30033003
return true;
30043004
}
30053005

3006+
static bool interp__builtin_ia32_psign_op(InterpState &S, CodePtr OpPC,
3007+
const CallExpr *Call) {
3008+
assert(Call->getNumArgs() == 2);
3009+
3010+
const Pointer &B = S.Stk.pop<Pointer>();
3011+
const Pointer &A = S.Stk.pop<Pointer>();
3012+
const Pointer &Result = S.Stk.peek<Pointer>();
3013+
3014+
unsigned ResultLen = A.getNumElems();
3015+
QualType ElemQT = getElemType(A);
3016+
OptPrimType ElemT = S.getContext().classify(ElemQT);
3017+
unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);
3018+
bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
3019+
3020+
INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
3021+
for (unsigned I = 0; I != ResultLen; ++I) {
3022+
APSInt AElem = A.elem<T>(I).toAPSInt();
3023+
APSInt BElem = B.elem<T>(I).toAPSInt();
3024+
APSInt ResultElem =
3025+
(BElem.isNegative() ? -AElem
3026+
: BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
3027+
: AElem);
3028+
Result.elem<T>(I) = static_cast<T>(ResultElem);
3029+
}
3030+
});
3031+
Result.initializeAllElements();
3032+
return true;
3033+
}
3034+
30063035
static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
30073036
const CallExpr *Call, bool MaskZ) {
30083037
assert(Call->getNumArgs() == 5);
@@ -3630,6 +3659,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
36303659
.extractBits(16, 1);
36313660
});
36323661

3662+
case X86::BI__builtin_ia32_psignb128:
3663+
case X86::BI__builtin_ia32_psignb256:
3664+
case X86::BI__builtin_ia32_psignw128:
3665+
case X86::BI__builtin_ia32_psignw256:
3666+
case X86::BI__builtin_ia32_psignd128:
3667+
case X86::BI__builtin_ia32_psignd256:
3668+
return interp__builtin_ia32_psign_op(S, OpPC, Call);
3669+
36333670
case clang::X86::BI__builtin_ia32_pavgb128:
36343671
case clang::X86::BI__builtin_ia32_pavgw128:
36353672
case clang::X86::BI__builtin_ia32_pavgb256:

clang/lib/AST/ExprConstant.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12245,6 +12245,36 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1224512245
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1224612246
}
1224712247

12248+
case X86::BI__builtin_ia32_psignb128:
12249+
case X86::BI__builtin_ia32_psignb256:
12250+
case X86::BI__builtin_ia32_psignw128:
12251+
case X86::BI__builtin_ia32_psignw256:
12252+
case X86::BI__builtin_ia32_psignd128:
12253+
case X86::BI__builtin_ia32_psignd256: {
12254+
APValue ASource, BSource;
12255+
if (!EvaluateAsRValue(Info, E->getArg(0), ASource) ||
12256+
!EvaluateAsRValue(Info, E->getArg(1), BSource))
12257+
return false;
12258+
unsigned SourceLen = ASource.getVectorLength();
12259+
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
12260+
QualType ElemQT = VT->getElementType();
12261+
unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);
12262+
bool ResultElemUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
12263+
12264+
SmallVector<APValue, 16> Result;
12265+
Result.reserve(SourceLen);
12266+
for (unsigned I = 0; I != SourceLen; ++I) {
12267+
APSInt &AElem = ASource.getVectorElt(I).getInt();
12268+
APSInt &BElem = BSource.getVectorElt(I).getInt();
12269+
APSInt ResultElem =
12270+
(BElem.isNegative() ? -AElem
12271+
: BElem.isZero() ? APSInt(ElemBitWidth, ResultElemUnsigned)
12272+
: AElem);
12273+
Result.emplace_back(ResultElem);
12274+
}
12275+
return Success(APValue(Result.data(), Result.size()), E);
12276+
}
12277+
1224812278
case X86::BI__builtin_ia32_blendvpd:
1224912279
case X86::BI__builtin_ia32_blendvpd256:
1225012280
case X86::BI__builtin_ia32_blendvps:

clang/lib/Headers/avx2intrin.h

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1976,10 +1976,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) {
19761976
/// \param __b
19771977
/// A 256-bit integer vector].
19781978
/// \returns A 256-bit integer vector containing the result.
1979-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1980-
_mm256_sign_epi8(__m256i __a, __m256i __b)
1981-
{
1982-
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
1979+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1980+
_mm256_sign_epi8(__m256i __a, __m256i __b) {
1981+
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
19831982
}
19841983

19851984
/// Sets each element of the result to the corresponding element of the
@@ -1997,10 +1996,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
19971996
/// \param __b
19981997
/// A 256-bit vector of [16 x i16].
19991998
/// \returns A 256-bit vector of [16 x i16] containing the result.
2000-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2001-
_mm256_sign_epi16(__m256i __a, __m256i __b)
2002-
{
2003-
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
1999+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2000+
_mm256_sign_epi16(__m256i __a, __m256i __b) {
2001+
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
20042002
}
20052003

20062004
/// Sets each element of the result to the corresponding element of the
@@ -2018,10 +2016,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
20182016
/// \param __b
20192017
/// A 256-bit vector of [8 x i32].
20202018
/// \returns A 256-bit vector of [8 x i32] containing the result.
2021-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2022-
_mm256_sign_epi32(__m256i __a, __m256i __b)
2023-
{
2024-
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
2019+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2020+
_mm256_sign_epi32(__m256i __a, __m256i __b) {
2021+
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
20252022
}
20262023

20272024
/// Shifts each 128-bit half of the 256-bit integer vector \a a left by

clang/lib/Headers/tmmintrin.h

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -641,10 +641,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) {
641641
/// A 128-bit integer vector containing control bytes corresponding to
642642
/// positions in the destination.
643643
/// \returns A 128-bit integer vector containing the resultant values.
644-
static __inline__ __m128i __DEFAULT_FN_ATTRS
645-
_mm_sign_epi8(__m128i __a, __m128i __b)
646-
{
647-
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
644+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
645+
_mm_sign_epi8(__m128i __a, __m128i __b) {
646+
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
648647
}
649648

650649
/// For each 16-bit integer in the first source operand, perform one of
@@ -667,10 +666,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
667666
/// A 128-bit integer vector containing control words corresponding to
668667
/// positions in the destination.
669668
/// \returns A 128-bit integer vector containing the resultant values.
670-
static __inline__ __m128i __DEFAULT_FN_ATTRS
671-
_mm_sign_epi16(__m128i __a, __m128i __b)
672-
{
673-
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
669+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
670+
_mm_sign_epi16(__m128i __a, __m128i __b) {
671+
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
674672
}
675673

676674
/// For each 32-bit integer in the first source operand, perform one of
@@ -693,10 +691,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
693691
/// A 128-bit integer vector containing control doublewords corresponding to
694692
/// positions in the destination.
695693
/// \returns A 128-bit integer vector containing the resultant values.
696-
static __inline__ __m128i __DEFAULT_FN_ATTRS
697-
_mm_sign_epi32(__m128i __a, __m128i __b)
698-
{
699-
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
694+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
695+
_mm_sign_epi32(__m128i __a, __m128i __b) {
696+
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
700697
}
701698

702699
/// For each 8-bit integer in the first source operand, perform one of
@@ -719,11 +716,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
719716
/// A 64-bit integer vector containing control bytes corresponding to
720717
/// positions in the destination.
721718
/// \returns A 64-bit integer vector containing the resultant values.
722-
static __inline__ __m64 __DEFAULT_FN_ATTRS
723-
_mm_sign_pi8(__m64 __a, __m64 __b)
724-
{
725-
return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
726-
(__v16qi)__anyext128(__b)));
719+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
720+
__m64 __b) {
721+
return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
722+
(__v16qi)__zext128(__b)));
727723
}
728724

729725
/// For each 16-bit integer in the first source operand, perform one of
@@ -746,11 +742,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
746742
/// A 64-bit integer vector containing control words corresponding to
747743
/// positions in the destination.
748744
/// \returns A 64-bit integer vector containing the resultant values.
749-
static __inline__ __m64 __DEFAULT_FN_ATTRS
750-
_mm_sign_pi16(__m64 __a, __m64 __b)
751-
{
752-
return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
753-
(__v8hi)__anyext128(__b)));
745+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
746+
__m64 __b) {
747+
return __trunc64(
748+
__builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
754749
}
755750

756751
/// For each 32-bit integer in the first source operand, perform one of
@@ -773,11 +768,10 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
773768
/// A 64-bit integer vector containing two control doublewords corresponding
774769
/// to positions in the destination.
775770
/// \returns A 64-bit integer vector containing the resultant values.
776-
static __inline__ __m64 __DEFAULT_FN_ATTRS
777-
_mm_sign_pi32(__m64 __a, __m64 __b)
778-
{
779-
return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
780-
(__v4si)__anyext128(__b)));
771+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
772+
__m64 __b) {
773+
return __trunc64(
774+
__builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
781775
}
782776

783777
#undef __anyext128

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,23 +1151,30 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) {
11511151
return _mm256_shufflelo_epi16(a, 83);
11521152
}
11531153
TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
1154+
11541155
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
11551156
// CHECK-LABEL: test_mm256_sign_epi8
11561157
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
11571158
return _mm256_sign_epi8(a, b);
11581159
}
1160+
TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8(
1161+
(__m256i)(__v32qi){'B','r','i','g','h','t','n','e','o','n','f','o','x','j','u','m','p','s','o','v','e','r','p','r','o','g','r','a','m','m','e','r'},
1162+
(__m256i)(__v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'t','h','i','s'}),
1163+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'m','m','e','r'));
11591164

11601165
__m256i test_mm256_sign_epi16(__m256i a, __m256i b) {
11611166
// CHECK-LABEL: test_mm256_sign_epi16
11621167
// CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
11631168
return _mm256_sign_epi16(a, b);
11641169
}
1170+
TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,'h','i', 'b','y','e','!'}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0));
11651171

11661172
__m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
11671173
// CHECK-LABEL: test_mm256_sign_epi32
11681174
// CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
11691175
return _mm256_sign_epi32(a, b);
11701176
}
1177+
TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed,'o','o','p','s'}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, -'o',-'o',-'p',-'s'));
11711178

11721179
__m256i test_mm256_slli_epi16(__m256i a) {
11731180
// CHECK-LABEL: test_mm256_slli_epi16

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,23 +598,28 @@ __m64 test_mm_shuffle_pi16(__m64 a) {
598598
return _mm_shuffle_pi16(a, 3);
599599
}
600600
TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0));
601+
601602
__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
602603
// CHECK-LABEL: test_mm_sign_pi8
603604
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
604605
return _mm_sign_pi8(a, b);
605606
}
607+
TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
608+
TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){6,7,6,7, 6,7,6,7}, (__m64)(__v8qi){1,1,1,1, 0,0,0,0}), 6,7,6,7, 0,0,0,0));
606609

607610
__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
608611
// CHECK-LABEL: test_mm_sign_pi16
609612
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
610613
return _mm_sign_pi16(a, b);
611614
}
615+
TEST_CONSTEXPR(match_v4hi(_mm_sign_pi16((__m64)(__v4hi){-1,0,1,0}, (__m64)(__v4hi){1,0,-1,0}), -1,0,-1,0));
612616

613617
__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
614618
// CHECK-LABEL: test_mm_sign_pi32
615619
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
616620
return _mm_sign_pi32(a, b);
617621
}
622+
TEST_CONSTEXPR(match_v2si(_mm_sign_pi32((__m64)(__v2si){0x7FFF, -1}, (__m64)(__v2si){-1, 0x7FFF}), -0x7FFF, -1));
618623

619624
__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
620625
// CHECK-LABEL: test_mm_sll_pi16

clang/test/CodeGen/X86/ssse3-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,18 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) {
125125
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
126126
return _mm_sign_epi8(a, b);
127127
}
128+
TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qi){'g','r','i','n','d','i','n','g', 'l','e','e','t','c','o','d','e'}, (__m128i)(__v16qi){0,1,0,1, 1,1,0,0, 0,0,1,1, 1,0,1,0}), 0,'r',0,'n', 'd','i',0,0, 0,0,'e','t', 'c',0,'d',0));
128129

129130
__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
130131
// CHECK-LABEL: test_mm_sign_epi16
131132
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
132133
return _mm_sign_epi16(a, b);
133134
}
135+
TEST_CONSTEXPR(match_v8hi(_mm_sign_epi16((__m128i)(__v8hi){0,-2,0,-4,0,-6,0,-8}, (__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,7,-8}), 0,2,0,4,0,6,0,8));
134136

135137
__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
136138
// CHECK-LABEL: test_mm_sign_epi32
137139
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
138140
return _mm_sign_epi32(a, b);
139141
}
142+
TEST_CONSTEXPR(match_v4si(_mm_sign_epi32((__m128i)(__v4si){-1,-2,-3,-4}, (__m128i)(__v4si){-4,-3,-2,-1}), 1,2,3,4));

0 commit comments

Comments
 (0)