Skip to content

Commit d42a1d4

Browse files
authored
[Headers][X86] Allow pmuludq/pmuldq to be used in constexpr (#153293)
Adds `constexpr` support for `pmuludq` and `pmuldq` intrinsics. Closes #153002. Part of #30794.
1 parent 76bb987 commit d42a1d4

File tree

11 files changed

+79
-23
lines changed

11 files changed

+79
-23
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
268268
}
269269

270270
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
271-
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
272271
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
273272
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
274273
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
@@ -290,6 +289,10 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
290289
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
291290
}
292291

292+
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
293+
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
294+
}
295+
293296
let Features = "sse3", Attributes = [NoThrow] in {
294297
def monitor : X86Builtin<"void(void const *, unsigned int, unsigned int)">;
295298
def mwait : X86Builtin<"void(unsigned int, unsigned int)">;
@@ -312,7 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
312315
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
313316
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
314317
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
315-
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
316318
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
317319
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
318320
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -329,6 +331,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
329331
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
330332
}
331333

334+
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
335+
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
336+
}
337+
332338
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
333339
def pcmpistrm128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
334340
def pcmpistri128 : X86Builtin<"int(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
@@ -580,9 +586,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
580586
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
581587
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
582588
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
583-
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
584589
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
585-
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
586590
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
587591
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
588592
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -620,6 +624,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
620624
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
621625
}
622626

627+
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
628+
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
629+
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
630+
}
631+
623632
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
624633
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
625634
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -1078,6 +1087,9 @@ let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWi
10781087
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
10791088
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
10801089
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
1090+
}
1091+
1092+
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
10811093
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
10821094
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
10831095
}

clang/lib/AST/ExprConstant.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11711,6 +11711,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1171111711

1171211712
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1171311713
}
11714+
case clang::X86::BI__builtin_ia32_pmuldq128:
11715+
case clang::X86::BI__builtin_ia32_pmuldq256:
11716+
case clang::X86::BI__builtin_ia32_pmuldq512:
11717+
case clang::X86::BI__builtin_ia32_pmuludq128:
11718+
case clang::X86::BI__builtin_ia32_pmuludq256:
11719+
case clang::X86::BI__builtin_ia32_pmuludq512: {
11720+
APValue SourceLHS, SourceRHS;
11721+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
11722+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
11723+
return false;
11724+
11725+
unsigned SourceLen = SourceLHS.getVectorLength();
11726+
SmallVector<APValue, 4> ResultElements;
11727+
ResultElements.reserve(SourceLen / 2);
11728+
11729+
for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
11730+
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
11731+
APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
11732+
11733+
switch (E->getBuiltinCallee()) {
11734+
case clang::X86::BI__builtin_ia32_pmuludq128:
11735+
case clang::X86::BI__builtin_ia32_pmuludq256:
11736+
case clang::X86::BI__builtin_ia32_pmuludq512:
11737+
ResultElements.push_back(
11738+
APValue(APSInt(llvm::APIntOps::muluExtended(LHS, RHS), true)));
11739+
break;
11740+
case clang::X86::BI__builtin_ia32_pmuldq128:
11741+
case clang::X86::BI__builtin_ia32_pmuldq256:
11742+
case clang::X86::BI__builtin_ia32_pmuldq512:
11743+
ResultElements.push_back(
11744+
APValue(APSInt(llvm::APIntOps::mulsExtended(LHS, RHS), false)));
11745+
break;
11746+
}
11747+
}
11748+
11749+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11750+
}
1171411751
case Builtin::BI__builtin_elementwise_max:
1171511752
case Builtin::BI__builtin_elementwise_min: {
1171611753
APValue SourceLHS, SourceRHS;

clang/lib/Headers/avx2intrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,9 +1667,8 @@ _mm256_cvtepu32_epi64(__m128i __V) {
16671667
/// \param __b
16681668
/// A 256-bit vector of [8 x i32] containing one of the source operands.
16691669
/// \returns A 256-bit vector of [4 x i64] containing the products.
1670-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1671-
_mm256_mul_epi32(__m256i __a, __m256i __b)
1672-
{
1670+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1671+
_mm256_mul_epi32(__m256i __a, __m256i __b) {
16731672
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
16741673
}
16751674

@@ -1796,9 +1795,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b)
17961795
/// \param __b
17971796
/// A 256-bit vector of [8 x i32] containing one of the source operands.
17981797
/// \returns A 256-bit vector of [4 x i64] containing the products.
1799-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1800-
_mm256_mul_epu32(__m256i __a, __m256i __b)
1801-
{
1798+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1799+
_mm256_mul_epu32(__m256i __a, __m256i __b) {
18021800
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
18031801
}
18041802

clang/lib/Headers/avx512fintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,9 +1413,8 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
14131413
(__v8di)_mm512_setzero_si512());
14141414
}
14151415

1416-
static __inline __m512i __DEFAULT_FN_ATTRS512
1417-
_mm512_mul_epi32(__m512i __X, __m512i __Y)
1418-
{
1416+
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1417+
_mm512_mul_epi32(__m512i __X, __m512i __Y) {
14191418
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
14201419
}
14211420

@@ -1435,9 +1434,8 @@ _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
14351434
(__v8di)_mm512_setzero_si512 ());
14361435
}
14371436

1438-
static __inline __m512i __DEFAULT_FN_ATTRS512
1439-
_mm512_mul_epu32(__m512i __X, __m512i __Y)
1440-
{
1437+
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
1438+
_mm512_mul_epu32(__m512i __X, __m512i __Y) {
14411439
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
14421440
}
14431441

clang/lib/Headers/emmintrin.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
6262

6363
#define __trunc64(x) \
6464
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
65+
#define __zext128(x) \
66+
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
67+
1, 2, 3)
6568
#define __anyext128(x) \
6669
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
6770
1, -1, -1)
@@ -2445,9 +2448,10 @@ _mm_mullo_epi16(__m128i __a, __m128i __b) {
24452448
/// \param __b
24462449
/// A 64-bit integer containing one of the source operands.
24472450
/// \returns A 64-bit integer vector containing the product of both operands.
2448-
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
2449-
return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a),
2450-
(__v4si)__anyext128(__b)));
2451+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_su32(__m64 __a,
2452+
__m64 __b) {
2453+
return __trunc64(__builtin_ia32_pmuludq128((__v4si)__zext128(__a),
2454+
(__v4si)__zext128(__b)));
24512455
}
24522456

24532457
/// Multiplies 32-bit unsigned integer values contained in the lower
@@ -2463,8 +2467,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) {
24632467
/// \param __b
24642468
/// A [2 x i64] vector containing one of the source operands.
24652469
/// \returns A [2 x i64] vector containing the product of both operands.
2466-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a,
2467-
__m128i __b) {
2470+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2471+
_mm_mul_epu32(__m128i __a, __m128i __b) {
24682472
return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
24692473
}
24702474

clang/lib/Headers/smmintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,8 +567,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
567567
/// A 128-bit vector of [4 x i32].
568568
/// \returns A 128-bit vector of [2 x i64] containing the products of both
569569
/// operands.
570-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
571-
__m128i __V2) {
570+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
571+
_mm_mul_epi32(__m128i __V1, __m128i __V2) {
572572
return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
573573
}
574574

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,7 @@ __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
920920
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
921921
return _mm256_mul_epi32(a, b);
922922
}
923+
TEST_CONSTEXPR(match_m256i(_mm256_mul_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 36, -40, -28));
923924

924925
__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
925926
// CHECK-LABEL: test_mm256_mul_epu32
@@ -928,6 +929,7 @@ __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
928929
// CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
929930
return _mm256_mul_epu32(a, b);
930931
}
932+
TEST_CONSTEXPR(match_m256i(_mm256_mul_epu32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), 4294967280, 36, 21474836440, 30064771044));
931933

932934
__m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
933935
// CHECK-LABEL: test_mm256_mulhi_epu16

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3027,6 +3027,7 @@ __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
30273027
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
30283028
return _mm512_mul_epi32(__A,__B);
30293029
}
3030+
TEST_CONSTEXPR(match_v8di(_mm512_mul_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -32, 84, -120, 140, -144, 132, -104, -60));
30303031

30313032
__m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
30323033
//CHECK-LABEL: test_mm512_maskz_mul_epi32
@@ -3057,6 +3058,7 @@ __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
30573058
//CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
30583059
return _mm512_mul_epu32(__A,__B);
30593060
}
3061+
TEST_CONSTEXPR(match_m512i(_mm512_mul_epu32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 4294967264, 84, 21474836360, 140, 38654705520, 132, 55834574744, 64424509380));
30603062

30613063
__m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
30623064
//CHECK-LABEL: test_mm512_maskz_mul_epu32

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ __m64 test_mm_mul_su32(__m64 a, __m64 b) {
389389
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
390390
return _mm_mul_su32(a, b);
391391
}
392+
TEST_CONSTEXPR(match_m64(_mm_mul_su32((__m64)(__v2si){+1, -2}, (__m64)(__v2si){-10, +8}), 4294967286));
392393

393394
__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
394395
// CHECK-LABEL: test_mm_mulhi_pi16

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,7 @@ __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
925925
// CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
926926
return _mm_mul_epu32(A, B);
927927
}
928+
TEST_CONSTEXPR(match_m128i(_mm_mul_epu32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-16, -14, +12, +10}), 4294967280, 36));
928929

929930
__m128d test_mm_mul_pd(__m128d A, __m128d B) {
930931
// CHECK-LABEL: test_mm_mul_pd

0 commit comments

Comments
 (0)