Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
}

let Features = "ssse3" in {
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psignb128
: X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def psignw128
: X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psignd128
: X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
Expand Down Expand Up @@ -603,10 +603,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, "
"_Vector<32, char>, _Constant int)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">;
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
Expand Down Expand Up @@ -677,7 +676,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;


def psignb256
: X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256
: X86Builtin<
"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256
: X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;

def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
Expand Down
14 changes: 14 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3802,6 +3802,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
}

case X86::BI__builtin_ia32_psignb128:
case X86::BI__builtin_ia32_psignb256:
case X86::BI__builtin_ia32_psignw128:
case X86::BI__builtin_ia32_psignw256:
case X86::BI__builtin_ia32_psignd128:
case X86::BI__builtin_ia32_psignd256:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, [](const APSInt &AElem, const APSInt &BElem) -> APInt {
return BElem[BElem.getBitWidth() - 1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BElem.isNegative()?

? static_cast<const APInt &>(-AElem)
: BElem.isZero() ? APInt(AElem.getBitWidth(), 0)
: static_cast<const APInt &>(AElem);
});
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might be able to just do this (we don't use anything from the APSInt at all so we can tweak the callback to take APInt refs):

    return interp__builtin_elementwise_int_binop(
        S, OpPC, Call, [](const APInt &AElem, const APInt &BElem) {
          if (BElem.isZero())
            return APInt::getZero(AElem.getBitWidth());
          if (BElem.isNegative())
            return -AElem;
          return AElem;
        });


case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12312,6 +12312,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case X86::BI__builtin_ia32_psignb128:
case X86::BI__builtin_ia32_psignb256:
case X86::BI__builtin_ia32_psignw128:
case X86::BI__builtin_ia32_psignw256:
case X86::BI__builtin_ia32_psignd128:
case X86::BI__builtin_ia32_psignd256:
return EvaluateBinOpExpr([](const APSInt &AElem,
const APSInt &BElem) -> APInt {
return BElem[BElem.getBitWidth() - 1] ? static_cast<const APInt &>(-AElem)
: BElem.isZero() ? APInt(AElem.getBitWidth(), 0)
: static_cast<const APInt &>(AElem);
});

case X86::BI__builtin_ia32_blendvpd:
case X86::BI__builtin_ia32_blendvpd256:
case X86::BI__builtin_ia32_blendvps:
Expand Down
21 changes: 9 additions & 12 deletions clang/lib/Headers/avx2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1975,10 +1975,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) {
/// \param __b
/// A 256-bit integer vector].
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sign_epi8(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sign_epi8(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
}

/// Sets each element of the result to the corresponding element of the
Expand All @@ -1996,10 +1995,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sign_epi16(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sign_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
}

/// Sets each element of the result to the corresponding element of the
Expand All @@ -2017,10 +2015,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32].
/// \returns A 256-bit vector of [8 x i32] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sign_epi32(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_sign_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
}

/// Shifts each 128-bit half of the 256-bit integer vector \a a left by
Expand Down
48 changes: 21 additions & 27 deletions clang/lib/Headers/tmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -641,10 +641,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) {
/// A 128-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi8(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sign_epi8(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
}

/// For each 16-bit integer in the first source operand, perform one of
Expand All @@ -667,10 +666,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
/// A 128-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi16(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sign_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
}

/// For each 32-bit integer in the first source operand, perform one of
Expand All @@ -693,10 +691,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
/// A 128-bit integer vector containing control doublewords corresponding to
/// positions in the destination.
/// \returns A 128-bit integer vector containing the resultant values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi32(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sign_epi32(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
}

/// For each 8-bit integer in the first source operand, perform one of
Expand All @@ -719,11 +716,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
/// A 64-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi8(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
(__v16qi)__anyext128(__b)));
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a,
__m64 __b) {
return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a),
(__v16qi)__zext128(__b)));
}

/// For each 16-bit integer in the first source operand, perform one of
Expand All @@ -746,11 +742,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi16(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
(__v8hi)__anyext128(__b)));
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a,
__m64 __b) {
return __trunc64(
__builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b)));
}

/// For each 32-bit integer in the first source operand, perform one of
Expand All @@ -773,11 +768,10 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing two control doublewords corresponding
/// to positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi32(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
(__v4si)__anyext128(__b)));
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a,
__m64 __b) {
return __trunc64(
__builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b)));
}

#undef __anyext128
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1155,23 +1155,30 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) {
return _mm256_shufflelo_epi16(a, 83);
}
TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );

__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi8
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_sign_epi8(a, b);
}
TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8(
(__m256i)(__v32qs){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1},
(__m256i)(__v32qs){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1}),
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1));

__m256i test_mm256_sign_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_sign_epi16(a, b);
}
TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,0,0, 0,0,0,0}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0));

__m256i test_mm256_sign_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_sign_epi32(a, b);
}
TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed, -1,2,-3,4}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, 1,-2,3,-4));

__m256i test_mm256_slli_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_slli_epi16
Expand Down
5 changes: 5 additions & 0 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,23 +602,28 @@ __m64 test_mm_shuffle_pi16(__m64 a) {
return _mm_shuffle_pi16(a, 3);
}
TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0));

__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi8
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(
return _mm_sign_pi8(a, b);
}
TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){6,7,6,7, 6,7,6,7}, (__m64)(__v8qi){1,1,1,1, 0,0,0,0}), 6,7,6,7, 0,0,0,0));

__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(
return _mm_sign_pi16(a, b);
}
TEST_CONSTEXPR(match_v4hi(_mm_sign_pi16((__m64)(__v4hi){-1,0,1,0}, (__m64)(__v4hi){1,0,-1,0}), -1,0,-1,0));

__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sign_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(
return _mm_sign_pi32(a, b);
}
TEST_CONSTEXPR(match_v2si(_mm_sign_pi32((__m64)(__v2si){0x7FFF, -1}, (__m64)(__v2si){-1, 0x7FFF}), -0x7FFF, -1));

__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_sll_pi16
Expand Down
3 changes: 3 additions & 0 deletions clang/test/CodeGen/X86/ssse3-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,18 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) {
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_sign_epi8(a, b);
}
TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qs){11,0,13,14, 0,16,17,18, 19,20,21,22, 23,24,25,26}, (__m128i)(__v16qs){0,1,0,1, -1,1,0,0, 0,0,1,1, -1,0,-1,0}), 0,0,0,14, 0,16,0,0, 0,0,21,22, -23,0,-25,0));

__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_sign_epi16(a, b);
}
TEST_CONSTEXPR(match_v8hi(_mm_sign_epi16((__m128i)(__v8hi){0,-2,0,-4,0,-6,0,-8}, (__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,7,-8}), 0,2,0,4,0,6,0,8));

__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_sign_epi32(a, b);
}
TEST_CONSTEXPR(match_v4si(_mm_sign_epi32((__m128i)(__v4si){-1,-2,-3,-4}, (__m128i)(__v4si){-4,-3,-2,-1}), 1,2,3,4));