Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
Expand All @@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "sse3", Attributes = [NoThrow] in {
def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
Expand All @@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
Expand All @@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
Expand Down Expand Up @@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
Expand Down Expand Up @@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
Expand Down Expand Up @@ -1078,6 +1087,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
}

let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
}
Expand Down
37 changes: 37 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11711,6 +11711,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
case clang::X86::BI__builtin_ia32_pmuludq128:
case clang::X86::BI__builtin_ia32_pmuludq256:
case clang::X86::BI__builtin_ia32_pmuludq512: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;

unsigned SourceLen = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen / 2);

for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();

switch (E->getBuiltinCallee()) {
case clang::X86::BI__builtin_ia32_pmuludq128:
case clang::X86::BI__builtin_ia32_pmuludq256:
case clang::X86::BI__builtin_ia32_pmuludq512:
ResultElements.push_back(
APValue(APSInt(llvm::APIntOps::muluExtended(LHS, RHS), true)));
break;
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
ResultElements.push_back(
APValue(APSInt(llvm::APIntOps::mulsExtended(LHS, RHS), false)));
break;
}
}

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avx2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1667,9 +1667,8 @@ _mm256_cvtepu32_epi64(__m128i __V) {
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mul_epi32(__m256i __a, __m256i __b)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mul_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
}

Expand Down Expand Up @@ -1796,9 +1795,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mul_epu32(__m256i __a, __m256i __b)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mul_epu32(__m256i __a, __m256i __b) {
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
}

Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1413,9 +1413,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}

static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_mul_epi32(__m512i __X, __m512i __Y)
{
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}

Expand All @@ -1435,9 +1434,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512 ());
}

static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_mul_epu32(__m512i __X, __m512i __Y)
{
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}

Expand Down
14 changes: 9 additions & 5 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));

#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
#define __zext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
Expand Down Expand Up @@ -2445,9 +2448,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
(__v4si)__anyext128(__b)));
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
__m64 __b) {
return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
(__v4si)__zext128(__b)));
}

/// Multiplies 32-bit unsigned integer values contained in the lower
Expand All @@ -2463,8 +2467,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
/// \param __b
/// A [2 x i64] vector containing one of the source operands.
/// \returns A [2 x i64] vector containing the product of both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mul_epu32(__m128i __a, __m128i __b) {
return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
}

Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/smmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [2 x i64] containing the products of both
/// operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
__m128i __V2) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mul_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
}

Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epi32(a, b);
}
TEST_CONSTEXPR(match_m256i(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 36, -40, -28));

__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mul_epu32
Expand All @@ -928,6 +929,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epu32(a, b);
}
TEST_CONSTEXPR(match_m256i(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), 4294967280, 36, 21474836440, 30064771044));

__m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mulhi_epu16
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epi32(__A,__B);
}
TEST_CONSTEXPR(match_v8di(_mm512_mul_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -32, 84, -120, 140, -144, 132, -104, -60));

__m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epi32
Expand Down Expand Up @@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epu32(__A,__B);
}
TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 4294967264, 84, 21474836360, 140, 38654705520, 132, 55834574744, 64424509380));

__m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epu32
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_su32(a, b);
}
TEST_CONSTEXPR(match_m64(_mm_mul_su32((__m64)(__v2si){+1, -2}, (__m64)(__v2si){-10, +8}), 4294967286));

__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pi16
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epu32(A, B);
}
TEST_CONSTEXPR(match_m128i(_mm_mul_epu32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-16, -14, +12, +10}), 4294967280, 36));

__m128d test_mm_mul_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_mul_pd
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/sse41-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epi32(x, y);
}
TEST_CONSTEXPR(match_m128i(_mm_mul_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-16, -14, +12, +10}), -16, 36));

__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mullo_epi32
Expand Down