Skip to content

Commit 7dd86af

Browse files
kimsh02aokblast
authored andcommitted
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow MMX/SSE/AVX2 PSIGN intrinsics to be used in constexpr (llvm#163685)
Fix llvm#155812
1 parent 7bbbe5b commit 7dd86af

File tree

8 files changed

+92
-54
lines changed

8 files changed

+92
-54
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
123123
}
124124
}
125125

126-
let Features = "ssse3" in {
127-
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
128-
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
129-
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
130-
}
131-
132126
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
127+
def psignb128
128+
: X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
129+
def psignw128
130+
: X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
131+
def psignd128
132+
: X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
133133
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
134134
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
135135
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
@@ -603,10 +603,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
603603
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
604604
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, "
605605
"_Vector<32, char>, _Constant int)">;
606-
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
607-
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
608-
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
609-
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
606+
def psadbw256
607+
: X86Builtin<
608+
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
610609
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
611610
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
612611
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
@@ -677,7 +676,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
677676
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
678677
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
679678
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
680-
679+
680+
def psignb256
681+
: X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
682+
def psignw256
683+
: X86Builtin<
684+
"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
685+
def psignd256
686+
: X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
687+
681688
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
682689
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
683690
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3809,6 +3809,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
38093809
return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
38103810
}
38113811

3812+
case X86::BI__builtin_ia32_psignb128:
3813+
case X86::BI__builtin_ia32_psignb256:
3814+
case X86::BI__builtin_ia32_psignw128:
3815+
case X86::BI__builtin_ia32_psignw256:
3816+
case X86::BI__builtin_ia32_psignd128:
3817+
case X86::BI__builtin_ia32_psignd256:
3818+
return interp__builtin_elementwise_int_binop(
3819+
S, OpPC, Call, [](const APInt &AElem, const APInt &BElem) {
3820+
if (BElem.isZero())
3821+
return APInt::getZero(AElem.getBitWidth());
3822+
if (BElem.isNegative())
3823+
return -AElem;
3824+
return AElem;
3825+
});
3826+
38123827
case clang::X86::BI__builtin_ia32_pavgb128:
38133828
case clang::X86::BI__builtin_ia32_pavgw128:
38143829
case clang::X86::BI__builtin_ia32_pavgb256:

clang/lib/AST/ExprConstant.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12312,6 +12312,20 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1231212312
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1231312313
}
1231412314

12315+
case X86::BI__builtin_ia32_psignb128:
12316+
case X86::BI__builtin_ia32_psignb256:
12317+
case X86::BI__builtin_ia32_psignw128:
12318+
case X86::BI__builtin_ia32_psignw256:
12319+
case X86::BI__builtin_ia32_psignd128:
12320+
case X86::BI__builtin_ia32_psignd256:
12321+
return EvaluateBinOpExpr([](const APInt &AElem, const APInt &BElem) {
12322+
if (BElem.isZero())
12323+
return APInt::getZero(AElem.getBitWidth());
12324+
if (BElem.isNegative())
12325+
return -AElem;
12326+
return AElem;
12327+
});
12328+
1231512329
case X86::BI__builtin_ia32_blendvpd:
1231612330
case X86::BI__builtin_ia32_blendvpd256:
1231712331
case X86::BI__builtin_ia32_blendvps:

clang/lib/Headers/avx2intrin.h

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1975,10 +1975,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) {
19751975
/// \param __b
19761976
/// A 256-bit integer vector].
19771977
/// \returns A 256-bit integer vector containing the result.
1978-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1979-
_mm256_sign_epi8(__m256i __a, __m256i __b)
1980-
{
1981-
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
1978+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1979+
_mm256_sign_epi8(__m256i __a, __m256i __b) {
1980+
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
19821981
}
19831982

19841983
/// Sets each element of the result to the corresponding element of the
@@ -1996,10 +1995,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
19961995
/// \param __b
19971996
/// A 256-bit vector of [16 x i16].
19981997
/// \returns A 256-bit vector of [16 x i16] containing the result.
1999-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2000-
_mm256_sign_epi16(__m256i __a, __m256i __b)
2001-
{
2002-
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
1998+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1999+
_mm256_sign_epi16(__m256i __a, __m256i __b) {
2000+
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
20032001
}
20042002

20052003
/// Sets each element of the result to the corresponding element of the
@@ -2017,10 +2015,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
20172015
/// \param __b
20182016
/// A 256-bit vector of [8 x i32].
20192017
/// \returns A 256-bit vector of [8 x i32] containing the result.
2020-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2021-
_mm256_sign_epi32(__m256i __a, __m256i __b)
2022-
{
2023-
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
2018+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2019+
_mm256_sign_epi32(__m256i __a, __m256i __b) {
2020+
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
20242021
}
20252022

20262023
/// Shifts each 128-bit half of the 256-bit integer vector \a a left by

clang/lib/Headers/tmmintrin.h

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@
2626
#define __zext128(x) \
2727
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
2828
1, 2, 3)
29-
#define __anyext128(x) \
30-
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
31-
1, -1, -1)
3229

3330
#if defined(__cplusplus) && (__cplusplus >= 201103L)
3431
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
@@ -641,10 +638,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) {
641638
/// A 128-bit integer vector containing control bytes corresponding to
642639
/// positions in the destination.
643640
/// \returns A 128-bit integer vector containing the resultant values.
644-
static __inline__ __m128i __DEFAULT_FN_ATTRS
645-
_mm_sign_epi8(__m128i __a, __m128i __b)
646-
{
647-
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
641+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
642+
_mm_sign_epi8(__m128i __a, __m128i __b) {
643+
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
648644
}
649645

650646
/// For each 16-bit integer in the first source operand, perform one of
@@ -667,10 +663,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
667663
/// A 128-bit integer vector containing control words corresponding to
668664
/// positions in the destination.
669665
/// \returns A 128-bit integer vector containing the resultant values.
670-
static __inline__ __m128i __DEFAULT_FN_ATTRS
671-
_mm_sign_epi16(__m128i __a, __m128i __b)
672-
{
673-
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
666+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
667+
_mm_sign_epi16(__m128i __a, __m128i __b) {
668+
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
674669
}
675670

676671
/// For each 32-bit integer in the first source operand, perform one of
@@ -693,10 +688,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
693688
/// A 128-bit integer vector containing control doublewords corresponding to
694689
/// positions in the destination.
695690
/// \returns A 128-bit integer vector containing the resultant values.
696-
static __inline__ __m128i __DEFAULT_FN_ATTRS
697-
_mm_sign_epi32(__m128i __a, __m128i __b)
698-
{
699-
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
691+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
692+
_mm_sign_epi32(__m128i __a, __m128i __b) {
693+
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
700694
}
701695

702696
/// For each 8-bit integer in the first source operand, perform one of
@@ -719,11 +713,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
719713
/// A 64-bit integer vector containing control bytes corresponding to
720714
/// positions in the destination.
721715
/// \returns A 64-bit integer vector containing the resultant values.
722-
static __inline__ __m64 __DEFAULT_FN_ATTRS
723-
_mm_sign_pi8(__m64 __a, __m64 __b)
724-
{
725-
return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
726-
(__v16qi)__anyext128(__b)));
716+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
717+
__m64 __b) {
718+
return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
719+
(__v16qi)__zext128(__b)));
727720
}
728721

729722
/// For each 16-bit integer in the first source operand, perform one of
@@ -746,11 +739,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
746739
/// A 64-bit integer vector containing control words corresponding to
747740
/// positions in the destination.
748741
/// \returns A 64-bit integer vector containing the resultant values.
749-
static __inline__ __m64 __DEFAULT_FN_ATTRS
750-
_mm_sign_pi16(__m64 __a, __m64 __b)
751-
{
752-
return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
753-
(__v8hi)__anyext128(__b)));
742+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
743+
__m64 __b) {
744+
return __trunc64(
745+
__builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
754746
}
755747

756748
/// For each 32-bit integer in the first source operand, perform one of
@@ -773,14 +765,12 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
773765
/// A 64-bit integer vector containing two control doublewords corresponding
774766
/// to positions in the destination.
775767
/// \returns A 64-bit integer vector containing the resultant values.
776-
static __inline__ __m64 __DEFAULT_FN_ATTRS
777-
_mm_sign_pi32(__m64 __a, __m64 __b)
778-
{
779-
return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
780-
(__v4si)__anyext128(__b)));
768+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
769+
__m64 __b) {
770+
return __trunc64(
771+
__builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
781772
}
782773

783-
#undef __anyext128
784774
#undef __zext128
785775
#undef __trunc64
786776
#undef __DEFAULT_FN_ATTRS

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,23 +1155,30 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) {
11551155
return _mm256_shufflelo_epi16(a, 83);
11561156
}
11571157
TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
1158+
11581159
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
11591160
// CHECK-LABEL: test_mm256_sign_epi8
11601161
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
11611162
return _mm256_sign_epi8(a, b);
11621163
}
1164+
TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8(
1165+
(__m256i)(__v32qs){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
1166+
(__m256i)(__v32qs){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1}),
1167+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1));
11631168

11641169
__m256i test_mm256_sign_epi16(__m256i a, __m256i b) {
11651170
// CHECK-LABEL: test_mm256_sign_epi16
11661171
// CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
11671172
return _mm256_sign_epi16(a, b);
11681173
}
1174+
TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,0,0, 0,0,0,0}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0));
11691175

11701176
__m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
11711177
// CHECK-LABEL: test_mm256_sign_epi32
11721178
// CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
11731179
return _mm256_sign_epi32(a, b);
11741180
}
1181+
TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed, -1,2,-3,4}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, 1,-2,3,-4));
11751182

11761183
__m256i test_mm256_slli_epi16(__m256i a) {
11771184
// CHECK-LABEL: test_mm256_slli_epi16

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,23 +602,28 @@ __m64 test_mm_shuffle_pi16(__m64 a) {
602602
return _mm_shuffle_pi16(a, 3);
603603
}
604604
TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0));
605+
605606
__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
606607
// CHECK-LABEL: test_mm_sign_pi8
607608
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
608609
return _mm_sign_pi8(a, b);
609610
}
611+
TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
612+
TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){6,7,6,7, 6,7,6,7}, (__m64)(__v8qi){1,1,1,1, 0,0,0,0}), 6,7,6,7, 0,0,0,0));
610613

611614
__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
612615
// CHECK-LABEL: test_mm_sign_pi16
613616
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
614617
return _mm_sign_pi16(a, b);
615618
}
619+
TEST_CONSTEXPR(match_v4hi(_mm_sign_pi16((__m64)(__v4hi){-1,0,1,0}, (__m64)(__v4hi){1,0,-1,0}), -1,0,-1,0));
616620

617621
__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
618622
// CHECK-LABEL: test_mm_sign_pi32
619623
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
620624
return _mm_sign_pi32(a, b);
621625
}
626+
TEST_CONSTEXPR(match_v2si(_mm_sign_pi32((__m64)(__v2si){0x7FFF, -1}, (__m64)(__v2si){-1, 0x7FFF}), -0x7FFF, -1));
622627

623628
__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
624629
// CHECK-LABEL: test_mm_sll_pi16

clang/test/CodeGen/X86/ssse3-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,18 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) {
125125
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
126126
return _mm_sign_epi8(a, b);
127127
}
128+
TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qs){11,0,13,14, 0,16,17,18, 19,20,21,22, 23,24,25,26}, (__m128i)(__v16qs){0,1,0,1, -1,1,0,0, 0,0,1,1, -1,0,-1,0}), 0,0,0,14, 0,16,0,0, 0,0,21,22, -23,0,-25,0));
128129

129130
__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
130131
// CHECK-LABEL: test_mm_sign_epi16
131132
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
132133
return _mm_sign_epi16(a, b);
133134
}
135+
TEST_CONSTEXPR(match_v8hi(_mm_sign_epi16((__m128i)(__v8hi){0,-2,0,-4,0,-6,0,-8}, (__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,7,-8}), 0,2,0,4,0,6,0,8));
134136

135137
__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
136138
// CHECK-LABEL: test_mm_sign_epi32
137139
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
138140
return _mm_sign_epi32(a, b);
139141
}
142+
TEST_CONSTEXPR(match_v4si(_mm_sign_epi32((__m128i)(__v4si){-1,-2,-3,-4}, (__m128i)(__v4si){-4,-3,-2,-1}), 1,2,3,4));

0 commit comments

Comments
 (0)