-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Headers][X86] Allow pmuludq/pmuldq to be used in constexpr #153293
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-adt Author: Pedro Lobo (pedroclobo) ChangesCloses #153002. Adds Full diff: https://github.com/llvm/llvm-project/pull/153293.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..ca2cc081b787a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
@@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse3", Attributes = [NoThrow] in {
def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
@@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
- def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
@@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
- def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+ def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1090,6 +1099,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 36dd0f5d7a065..234f257122d8f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11688,6 +11688,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen / 2);
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+
+ switch (E->getBuiltinCallee()) {
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muludq(LHS, RHS), true)));
+ break;
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muldq(LHS, RHS), false)));
+ break;
+ }
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f00a8a516ecfe..b098417173ba3 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1671,9 +1671,8 @@ _mm256_cvtepu32_epi64(__m128i __V) {
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
}
@@ -1800,9 +1799,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epu32(__m256i __a, __m256i __b) {
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9fc1df3acd3d0..0ba5aa338a6b8 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1417,9 +1417,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epi32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
@@ -1439,9 +1438,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512 ());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epu32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e632191113c4a..2ca25149db280 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -67,6 +67,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
@@ -2450,9 +2453,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
- return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
- (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
+ (__v4si)__zext128(__b)));
}
/// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2468,8 +2472,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
/// \param __b
/// A [2 x i64] vector containing one of the source operands.
/// \returns A [2 x i64] vector containing the product of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epu32(__m128i __a, __m128i __b) {
return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index c5075c419b70b..57d0d329312af 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [2 x i64] containing the products of both
/// operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 8790485f00a8c..bd7390d58a74c 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -903,6 +903,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, -1, 36, 0, -40, -1, -28, -1));
__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mul_epu32
@@ -911,6 +912,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epu32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 0, 36, 0, -40, 4, -28, 6));
__m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mulhi_epu16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 048bc3057b5f4..68da485213941 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epi32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epi32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epi32
@@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epu32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epu32
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 07fab2d39054c..5c700107c58e9 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -385,6 +385,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_su32(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_mul_su32((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){10, 8, 6, 4}), 10, 28, 16, 0));
__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 4ff14adcea0e1..0e00b2763db35 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -921,6 +921,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epu32(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 11, -15, -40, 82, -50, 6));
__m128d test_mm_mul_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_mul_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 10deb386d82aa..b7fc582788c8c 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -344,6 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epi32(x, y);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 26, 0, -40, 82, -42, -1));
__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mullo_epi32
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 295506393a1c4..cf7ddf3a9719f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2294,6 +2294,12 @@ LLVM_ABI APInt mulhs(const APInt &C1, const APInt &C2);
/// Returns the high N bits of the multiplication result.
LLVM_ABI APInt mulhu(const APInt &C1, const APInt &C2);
+/// Performs (2*N)-bit multiplication on sign-extended operands.
+LLVM_ABI APInt muldq(const APInt &C1, const APInt &C2);
+
+/// Performs (2*N)-bit multiplication on zero-extended operands.
+LLVM_ABI APInt muludq(const APInt &C1, const APInt &C2);
+
/// Compute X^N for N>=0.
/// 0^0 is supported and returns 1.
LLVM_ABI APInt pow(const APInt &X, int64_t N);
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 954af7fff92a8..2dd83e8bbe2fd 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -3136,6 +3136,22 @@ APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+APInt APIntOps::muludq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
+APInt APIntOps::muldq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
APInt APIntOps::pow(const APInt &X, int64_t N) {
assert(N >= 0 && "negative exponents not supported.");
APInt Acc = APInt(X.getBitWidth(), 1);
|
@llvm/pr-subscribers-llvm-support Author: Pedro Lobo (pedroclobo) ChangesCloses #153002. Adds Full diff: https://github.com/llvm/llvm-project/pull/153293.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..ca2cc081b787a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
@@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse3", Attributes = [NoThrow] in {
def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
@@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
- def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
@@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
- def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+ def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1090,6 +1099,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 36dd0f5d7a065..234f257122d8f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11688,6 +11688,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen / 2);
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+
+ switch (E->getBuiltinCallee()) {
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muludq(LHS, RHS), true)));
+ break;
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muldq(LHS, RHS), false)));
+ break;
+ }
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f00a8a516ecfe..b098417173ba3 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1671,9 +1671,8 @@ _mm256_cvtepu32_epi64(__m128i __V) {
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
}
@@ -1800,9 +1799,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epu32(__m256i __a, __m256i __b) {
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9fc1df3acd3d0..0ba5aa338a6b8 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1417,9 +1417,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epi32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
@@ -1439,9 +1438,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512 ());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epu32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e632191113c4a..2ca25149db280 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -67,6 +67,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
@@ -2450,9 +2453,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
- return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
- (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
+ (__v4si)__zext128(__b)));
}
/// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2468,8 +2472,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
/// \param __b
/// A [2 x i64] vector containing one of the source operands.
/// \returns A [2 x i64] vector containing the product of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epu32(__m128i __a, __m128i __b) {
return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index c5075c419b70b..57d0d329312af 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [2 x i64] containing the products of both
/// operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 8790485f00a8c..bd7390d58a74c 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -903,6 +903,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, -1, 36, 0, -40, -1, -28, -1));
__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mul_epu32
@@ -911,6 +912,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epu32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 0, 36, 0, -40, 4, -28, 6));
__m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mulhi_epu16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 048bc3057b5f4..68da485213941 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epi32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epi32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epi32
@@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epu32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epu32
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 07fab2d39054c..5c700107c58e9 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -385,6 +385,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_su32(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_mul_su32((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){10, 8, 6, 4}), 10, 28, 16, 0));
__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 4ff14adcea0e1..0e00b2763db35 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -921,6 +921,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epu32(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 11, -15, -40, 82, -50, 6));
__m128d test_mm_mul_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_mul_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 10deb386d82aa..b7fc582788c8c 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -344,6 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epi32(x, y);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 26, 0, -40, 82, -42, -1));
__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mullo_epi32
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 295506393a1c4..cf7ddf3a9719f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2294,6 +2294,12 @@ LLVM_ABI APInt mulhs(const APInt &C1, const APInt &C2);
/// Returns the high N bits of the multiplication result.
LLVM_ABI APInt mulhu(const APInt &C1, const APInt &C2);
+/// Performs (2*N)-bit multiplication on sign-extended operands.
+LLVM_ABI APInt muldq(const APInt &C1, const APInt &C2);
+
+/// Performs (2*N)-bit multiplication on zero-extended operands.
+LLVM_ABI APInt muludq(const APInt &C1, const APInt &C2);
+
/// Compute X^N for N>=0.
/// 0^0 is supported and returns 1.
LLVM_ABI APInt pow(const APInt &X, int64_t N);
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 954af7fff92a8..2dd83e8bbe2fd 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -3136,6 +3136,22 @@ APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+APInt APIntOps::muludq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
+APInt APIntOps::muldq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
APInt APIntOps::pow(const APInt &X, int64_t N) {
assert(N >= 0 && "negative exponents not supported.");
APInt Acc = APInt(X.getBitWidth(), 1);
|
@llvm/pr-subscribers-clang Author: Pedro Lobo (pedroclobo) ChangesCloses #153002. Adds Full diff: https://github.com/llvm/llvm-project/pull/153293.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..ca2cc081b787a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
@@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse3", Attributes = [NoThrow] in {
def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
@@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
- def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
@@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
- def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+ def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1090,6 +1099,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 36dd0f5d7a065..234f257122d8f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11688,6 +11688,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen / 2);
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+
+ switch (E->getBuiltinCallee()) {
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muludq(LHS, RHS), true)));
+ break;
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muldq(LHS, RHS), false)));
+ break;
+ }
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f00a8a516ecfe..b098417173ba3 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1671,9 +1671,8 @@ _mm256_cvtepu32_epi64(__m128i __V) {
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
}
@@ -1800,9 +1799,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epu32(__m256i __a, __m256i __b) {
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9fc1df3acd3d0..0ba5aa338a6b8 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1417,9 +1417,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epi32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
@@ -1439,9 +1438,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512 ());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epu32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e632191113c4a..2ca25149db280 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -67,6 +67,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
@@ -2450,9 +2453,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
- return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
- (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
+ (__v4si)__zext128(__b)));
}
/// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2468,8 +2472,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
/// \param __b
/// A [2 x i64] vector containing one of the source operands.
/// \returns A [2 x i64] vector containing the product of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epu32(__m128i __a, __m128i __b) {
return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index c5075c419b70b..57d0d329312af 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [2 x i64] containing the products of both
/// operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 8790485f00a8c..bd7390d58a74c 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -903,6 +903,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, -1, 36, 0, -40, -1, -28, -1));
__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mul_epu32
@@ -911,6 +912,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epu32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 0, 36, 0, -40, 4, -28, 6));
__m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mulhi_epu16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 048bc3057b5f4..68da485213941 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epi32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epi32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epi32
@@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epu32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epu32
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 07fab2d39054c..5c700107c58e9 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -385,6 +385,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_su32(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_mul_su32((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){10, 8, 6, 4}), 10, 28, 16, 0));
__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 4ff14adcea0e1..0e00b2763db35 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -921,6 +921,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epu32(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 11, -15, -40, 82, -50, 6));
__m128d test_mm_mul_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_mul_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 10deb386d82aa..b7fc582788c8c 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -344,6 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epi32(x, y);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 26, 0, -40, 82, -42, -1));
__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mullo_epi32
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 295506393a1c4..cf7ddf3a9719f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2294,6 +2294,12 @@ LLVM_ABI APInt mulhs(const APInt &C1, const APInt &C2);
/// Returns the high N bits of the multiplication result.
LLVM_ABI APInt mulhu(const APInt &C1, const APInt &C2);
+/// Performs (2*N)-bit multiplication on sign-extended operands.
+LLVM_ABI APInt muldq(const APInt &C1, const APInt &C2);
+
+/// Performs (2*N)-bit multiplication on zero-extended operands.
+LLVM_ABI APInt muludq(const APInt &C1, const APInt &C2);
+
/// Compute X^N for N>=0.
/// 0^0 is supported and returns 1.
LLVM_ABI APInt pow(const APInt &X, int64_t N);
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 954af7fff92a8..2dd83e8bbe2fd 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -3136,6 +3136,22 @@ APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+APInt APIntOps::muludq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
+APInt APIntOps::muldq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
APInt APIntOps::pow(const APInt &X, int64_t N) {
assert(N >= 0 && "negative exponents not supported.");
APInt Acc = APInt(X.getBitWidth(), 1);
|
@llvm/pr-subscribers-backend-x86 Author: Pedro Lobo (pedroclobo) ChangesCloses #153002. Adds Full diff: https://github.com/llvm/llvm-project/pull/153293.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3efc0be8fa698..ca2cc081b787a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
@@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
}
+let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse3", Attributes = [NoThrow] in {
def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
@@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
- def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
+}
+
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
@@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
- def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
- def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
}
+let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+ def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1090,6 +1099,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
+}
+
+let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 36dd0f5d7a065..234f257122d8f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11688,6 +11688,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512: {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ SmallVector<APValue, 4> ResultElements;
+ ResultElements.reserve(SourceLen / 2);
+
+ for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
+ APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
+ APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
+
+ switch (E->getBuiltinCallee()) {
+ case clang::X86::BI__builtin_ia32_pmuludq128:
+ case clang::X86::BI__builtin_ia32_pmuludq256:
+ case clang::X86::BI__builtin_ia32_pmuludq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muludq(LHS, RHS), true)));
+ break;
+ case clang::X86::BI__builtin_ia32_pmuldq128:
+ case clang::X86::BI__builtin_ia32_pmuldq256:
+ case clang::X86::BI__builtin_ia32_pmuldq512:
+ ResultElements.push_back(
+ APValue(APSInt(llvm::APIntOps::muldq(LHS, RHS), false)));
+ break;
+ }
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case Builtin::BI__builtin_elementwise_max:
case Builtin::BI__builtin_elementwise_min: {
APValue SourceLHS, SourceRHS;
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f00a8a516ecfe..b098417173ba3 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1671,9 +1671,8 @@ _mm256_cvtepu32_epi64(__m128i __V) {
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epi32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
}
@@ -1800,9 +1799,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [4 x i64] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mul_epu32(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mul_epu32(__m256i __a, __m256i __b) {
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
}
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9fc1df3acd3d0..0ba5aa338a6b8 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1417,9 +1417,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
(__v8di)_mm512_setzero_si512());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epi32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
@@ -1439,9 +1438,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512 ());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mul_epu32(__m512i __X, __m512i __Y)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e632191113c4a..2ca25149db280 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -67,6 +67,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
@@ -2450,9 +2453,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) {
/// \param __b
/// A 64-bit integer containing one of the source operands.
/// \returns A 64-bit integer vector containing the product of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
- return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
- (__v4si)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
+ __m64 __b) {
+ return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
+ (__v4si)__zext128(__b)));
}
/// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2468,8 +2472,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
/// \param __b
/// A [2 x i64] vector containing one of the source operands.
/// \returns A [2 x i64] vector containing the product of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epu32(__m128i __a, __m128i __b) {
return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index c5075c419b70b..57d0d329312af 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [2 x i64] containing the products of both
/// operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mul_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 8790485f00a8c..bd7390d58a74c 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -903,6 +903,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, -1, 36, 0, -40, -1, -28, -1));
__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mul_epu32
@@ -911,6 +912,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
return _mm256_mul_epu32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 0, 36, 0, -40, 4, -28, 6));
__m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mulhi_epu16
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 048bc3057b5f4..68da485213941 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epi32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epi32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epi32
@@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
return _mm512_mul_epu32(__A,__B);
}
+TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i){1, 2, 3, 4, 5, 6, 7, 8}, (__m512i){16, 14, 12, 10, 8, 6, 4, 2}), 16, 28, 36, 40, 40, 36, 28, 16));
__m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mul_epu32
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 07fab2d39054c..5c700107c58e9 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -385,6 +385,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_su32(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_mul_su32((__m64)(__v4hi){1, 2, 3, 4}, (__m64)(__v4hi){10, 8, 6, 4}), 10, 28, 16, 0));
__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mulhi_pi16
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 4ff14adcea0e1..0e00b2763db35 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -921,6 +921,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epu32(A, B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epu32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 11, -15, -40, 82, -50, 6));
__m128d test_mm_mul_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_mul_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index 10deb386d82aa..b7fc582788c8c 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -344,6 +344,7 @@ __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
return _mm_mul_epi32(x, y);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mul_epi32((__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 18, 26, 0, -40, 82, -42, -1));
__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mullo_epi32
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 295506393a1c4..cf7ddf3a9719f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2294,6 +2294,12 @@ LLVM_ABI APInt mulhs(const APInt &C1, const APInt &C2);
/// Returns the high N bits of the multiplication result.
LLVM_ABI APInt mulhu(const APInt &C1, const APInt &C2);
+/// Performs (2*N)-bit multiplication on sign-extended operands.
+LLVM_ABI APInt muldq(const APInt &C1, const APInt &C2);
+
+/// Performs (2*N)-bit multiplication on zero-extended operands.
+LLVM_ABI APInt muludq(const APInt &C1, const APInt &C2);
+
/// Compute X^N for N>=0.
/// 0^0 is supported and returns 1.
LLVM_ABI APInt pow(const APInt &X, int64_t N);
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 954af7fff92a8..2dd83e8bbe2fd 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -3136,6 +3136,22 @@ APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+APInt APIntOps::muludq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
+APInt APIntOps::muldq(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
APInt APIntOps::pow(const APInt &X, int64_t N) {
assert(N >= 0 && "negative exponents not supported.");
APInt Acc = APInt(X.getBitWidth(), 1);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We prefer to land ADT changes separately and rely on C++ unit tests for correctness. This is primarily because reverting anything in ADT is painful because of having to rebuild the whole project.
Just opened a PR upstreaming the ADT changes (#153399). |
Adds `mulsExtended` and `muluExtended` methods to `APInt`, as suggested in #153293. These are based on the `MULDQ` and `MULUDQ` x86 intrinsics.
fb8aa21
to
c38db4c
Compare
Rebased over 08eff57. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one last minor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Should I wait for @phoebewang's review before merging? |
If you want a second opinion its fine to wait, but you're under no obligation to wait. |
9ad0839
to
c82f901
Compare
Adds `constexpr` support for `pmuludq` and `pmuldq` intrinsics.
c82f901
to
4163545
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Closes #153002.
Part of #30794.
Adds
constexpr
support forpmuludq
andpmuldq
intrinsics.