Skip to content

Commit c8312bd

Browse files
authored
[Headers][X86] Enable constexpr handling for pmulhw/pmulhuw intrinsics (#152540)
This patch updates the pmulhw/pmulhuw builtins to support constant expression handling - extending the VectorExprEvaluator::VisitCallExpr handling code that handles elementwise integer binop builtins. Hopefully this can be used as reference patch to show how to add future target specific constexpr handling with minimal code impact. I've also enabled pmullw constexpr handling (which are tagged on #152490) as they all use very similar tests. I've also had to tweak the MMX -> SSE2 wrapper as undefs are not permitted in constexpr shuffle masks Fixes #152524
1 parent 9ea1d39 commit c8312bd

File tree

12 files changed

+88
-28
lines changed

12 files changed

+88
-28
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,20 +93,23 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
9393
}
9494

9595
let Features = "sse2" in {
96-
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
9796
def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
9897
def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
9998
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
10099
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
101100
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
102-
def pmulhuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
103101
def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
104102
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
105103
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
106104
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
107105
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
108106
}
109107

108+
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
109+
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
110+
def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
111+
}
112+
110113
let Features = "sse3" in {
111114
foreach Op = ["addsub", "hadd", "hsub"] in {
112115
def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
@@ -579,8 +582,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
579582
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
580583
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
581584
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
582-
def pmulhuw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
583-
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
584585
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
585586
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
586587
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
@@ -619,6 +620,11 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
619620
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
620621
}
621622

623+
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
624+
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
625+
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
626+
}
627+
622628
let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
623629
def maskloadd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int const *>, _Vector<8, int>)">;
624630
def maskloadq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int const *>, _Vector<4, long long int>)">;
@@ -1429,7 +1435,10 @@ let Features = "avx512bitalg,evex512", Attributes = [NoThrow, Const, RequiredVec
14291435

14301436
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
14311437
def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
1432-
def pmulhuw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
1438+
}
1439+
1440+
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
1441+
def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
14331442
def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
14341443
}
14351444

clang/lib/AST/ExprConstant.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11628,7 +11628,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1162811628
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1162911629
}
1163011630
case Builtin::BI__builtin_elementwise_add_sat:
11631-
case Builtin::BI__builtin_elementwise_sub_sat: {
11631+
case Builtin::BI__builtin_elementwise_sub_sat:
11632+
case clang::X86::BI__builtin_ia32_pmulhuw128:
11633+
case clang::X86::BI__builtin_ia32_pmulhuw256:
11634+
case clang::X86::BI__builtin_ia32_pmulhuw512:
11635+
case clang::X86::BI__builtin_ia32_pmulhw128:
11636+
case clang::X86::BI__builtin_ia32_pmulhw256:
11637+
case clang::X86::BI__builtin_ia32_pmulhw512: {
1163211638
APValue SourceLHS, SourceRHS;
1163311639
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
1163411640
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
@@ -11653,6 +11659,18 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1165311659
APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
1165411660
DestEltTy->isUnsignedIntegerOrEnumerationType())));
1165511661
break;
11662+
case clang::X86::BI__builtin_ia32_pmulhuw128:
11663+
case clang::X86::BI__builtin_ia32_pmulhuw256:
11664+
case clang::X86::BI__builtin_ia32_pmulhuw512:
11665+
ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhu(LHS, RHS),
11666+
/*isUnsigned=*/true)));
11667+
break;
11668+
case clang::X86::BI__builtin_ia32_pmulhw128:
11669+
case clang::X86::BI__builtin_ia32_pmulhw256:
11670+
case clang::X86::BI__builtin_ia32_pmulhw512:
11671+
ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
11672+
/*isUnsigned=*/false)));
11673+
break;
1165611674
}
1165711675
}
1165811676

clang/lib/Headers/avx2intrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,10 +1729,10 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
17291729
/// \param __b
17301730
/// A 256-bit vector of [16 x i16] containing one of the source operands.
17311731
/// \returns A 256-bit vector of [16 x i16] containing the products.
1732-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1732+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
17331733
_mm256_mulhi_epu16(__m256i __a, __m256i __b)
17341734
{
1735-
return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
1735+
return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b);
17361736
}
17371737

17381738
/// Multiplies signed 16-bit integer elements of two 256-bit vectors of
@@ -1748,7 +1748,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b)
17481748
/// \param __b
17491749
/// A 256-bit vector of [16 x i16] containing one of the source operands.
17501750
/// \returns A 256-bit vector of [16 x i16] containing the products.
1751-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1751+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
17521752
_mm256_mulhi_epi16(__m256i __a, __m256i __b)
17531753
{
17541754
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
@@ -1767,7 +1767,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b)
17671767
/// \param __b
17681768
/// A 256-bit vector of [16 x i16] containing one of the source operands.
17691769
/// \returns A 256-bit vector of [16 x i16] containing the products.
1770-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1770+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
17711771
_mm256_mullo_epi16(__m256i __a, __m256i __b)
17721772
{
17731773
return (__m256i)((__v16hu)__a * (__v16hu)__b);

clang/lib/Headers/avx512bwintrin.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ typedef unsigned long long __mmask64;
2525
__attribute__((__always_inline__, __nodebug__, \
2626
__target__("avx512bw,no-evex512")))
2727

28+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
29+
#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
30+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
31+
#else
32+
#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
33+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
34+
#endif
35+
2836
static __inline __mmask32 __DEFAULT_FN_ATTRS
2937
_knot_mask32(__mmask32 __M)
3038
{
@@ -438,7 +446,7 @@ _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
438446
(__v32hi)_mm512_setzero_si512());
439447
}
440448

441-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
449+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
442450
_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
443451
return (__m512i) ((__v32hu) __A * (__v32hu) __B);
444452
}
@@ -1082,7 +1090,7 @@ _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
10821090
(__v32hi)_mm512_setzero_si512());
10831091
}
10841092

1085-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1093+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10861094
_mm512_mulhi_epi16(__m512i __A, __m512i __B)
10871095
{
10881096
return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
@@ -1105,10 +1113,10 @@ _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
11051113
(__v32hi)_mm512_setzero_si512());
11061114
}
11071115

1108-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1116+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
11091117
_mm512_mulhi_epu16(__m512i __A, __m512i __B)
11101118
{
1111-
return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1119+
return (__m512i)__builtin_ia32_pmulhuw512((__v32hu) __A, (__v32hu) __B);
11121120
}
11131121

11141122
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -2010,5 +2018,7 @@ _mm512_sad_epu8 (__m512i __A, __m512i __B)
20102018

20112019
#undef __DEFAULT_FN_ATTRS512
20122020
#undef __DEFAULT_FN_ATTRS
2021+
#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
2022+
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
20132023

20142024
#endif

clang/lib/Headers/emmintrin.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,8 +2394,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
23942394
/// A 128-bit signed [8 x i16] vector.
23952395
/// \returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of
23962396
/// each of the eight 32-bit products.
2397-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a,
2398-
__m128i __b) {
2397+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2398+
_mm_mulhi_epi16(__m128i __a, __m128i __b) {
23992399
return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
24002400
}
24012401

@@ -2413,9 +2413,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a,
24132413
/// A 128-bit unsigned [8 x i16] vector.
24142414
/// \returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits
24152415
/// of each of the eight 32-bit products.
2416-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a,
2417-
__m128i __b) {
2418-
return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
2416+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2417+
_mm_mulhi_epu16(__m128i __a, __m128i __b) {
2418+
return (__m128i)__builtin_ia32_pmulhuw128((__v8hu)__a, (__v8hu)__b);
24192419
}
24202420

24212421
/// Multiplies the corresponding elements of two signed [8 x i16]
@@ -2432,8 +2432,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a,
24322432
/// A 128-bit signed [8 x i16] vector.
24332433
/// \returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of
24342434
/// each of the eight 32-bit products.
2435-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a,
2436-
__m128i __b) {
2435+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2436+
_mm_mullo_epi16(__m128i __a, __m128i __b) {
24372437
return (__m128i)((__v8hu)__a * (__v8hu)__b);
24382438
}
24392439

clang/lib/Headers/mmintrin.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ typedef char __v16qi __attribute__((__vector_size__(16)));
5757

5858
#define __trunc64(x) \
5959
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
60+
#define __zext128(x) \
61+
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
62+
1, 2, 3)
6063
#define __anyext128(x) \
6164
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
6265
1, -1, -1)
@@ -723,11 +726,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2)
723726
/// A 64-bit integer vector of [4 x i16].
724727
/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
725728
/// of the products of both parameters.
726-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
729+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
727730
_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
728731
{
729-
return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
730-
(__v8hi)__anyext128(__m2)));
732+
return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1),
733+
(__v8hi)__zext128(__m2)));
731734
}
732735

733736
/// Multiplies each 16-bit signed integer element of the first 64-bit

clang/lib/Headers/xmmintrin.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1)));
2424

2525
/* Unsigned types */
2626
typedef unsigned int __v4su __attribute__((__vector_size__(16)));
27+
typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
2728

2829
/* This header should only be included in a hosted environment as it depends on
2930
* a standard library to provide allocation routines. */
@@ -2447,11 +2448,11 @@ _mm_movemask_pi8(__m64 __a)
24472448
/// \param __b
24482449
/// A 64-bit integer vector containing one of the source operands.
24492450
/// \returns A 64-bit integer vector containing the products of both operands.
2450-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
2451+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
24512452
_mm_mulhi_pu16(__m64 __a, __m64 __b)
24522453
{
2453-
return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a),
2454-
(__v8hi)__anyext128(__b)));
2454+
return __trunc64(__builtin_ia32_pmulhuw128((__v8hu)__zext128(__a),
2455+
(__v8hu)__zext128(__b)));
24552456
}
24562457

24572458
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,12 +893,14 @@ __m256i test_mm256_mulhi_epu16(__m256i a, __m256i b) {
893893
// CHECK: call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
894894
return _mm256_mulhi_epu16(a, b);
895895
}
896+
TEST_CONSTEXPR(match_v16hi(_mm256_mulhi_epu16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -32, 0, 25, 4, -28, 0, 17, 8, -24, 0, 9, 12, 5, 14, 1));
896897

897898
__m256i test_mm256_mulhi_epi16(__m256i a, __m256i b) {
898899
// CHECK-LABEL: test_mm256_mulhi_epi16
899900
// CHECK: call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
900901
return _mm256_mulhi_epi16(a, b);
901902
}
903+
TEST_CONSTEXPR(match_v16hi(_mm256_mulhi_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1));
902904

903905
__m256i test_mm256_mulhrs_epi16(__m256i a, __m256i b) {
904906
// CHECK-LABEL: test_mm256_mulhrs_epi16
@@ -911,6 +913,7 @@ __m256i test_mm256_mullo_epi16(__m256i a, __m256i b) {
911913
// CHECK: mul <16 x i16>
912914
return _mm256_mullo_epi16(a, b);
913915
}
916+
TEST_CONSTEXPR(match_v16hi(_mm256_mullo_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -32, 60, 84, -104, -120, 132, 140, -144, -144, 140, 132, -120, -104, -84, -60, -32));
914917

915918
__m256i test_mm256_mullo_epi32(__m256i a, __m256i b) {
916919
// CHECK-LABEL: test_mm256_mullo_epi32

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44

55
#include <immintrin.h>
6+
#include "builtin_test_helpers.h"
67

78
__mmask32 test_knot_mask32(__mmask32 a) {
89
// CHECK-LABEL: @test_knot_mask32
@@ -823,6 +824,7 @@ __m512i test_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
823824
//CHECK: mul <32 x i16>
824825
return _mm512_mullo_epi16(__A, __B);
825826
}
827+
TEST_CONSTEXPR(match_v32hi(_mm512_mullo_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -64, 124, 180, -232, -280, 324, 364, -400, -432, 460, 484, -504, -520, 532, 540, -544, -544, 540, 532, -520, -504, 484, 460, -432, -400, 364, 324, -280, -232, -180, -124, -64));
826828

827829
__m512i test_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
828830
//CHECK-LABEL: @test_mm512_mask_mullo_epi16
@@ -1331,29 +1333,36 @@ __m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
13311333
// CHECK: @llvm.x86.avx512.pmulh.w.512
13321334
return _mm512_mulhi_epi16(__A,__B);
13331335
}
1336+
TEST_CONSTEXPR(match_v32hi(_mm512_mulhi_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1));
1337+
13341338
__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
13351339
// CHECK-LABEL: @test_mm512_mask_mulhi_epi16
13361340
// CHECK: @llvm.x86.avx512.pmulh.w.512
13371341
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
13381342
return _mm512_mask_mulhi_epi16(__W,__U,__A,__B);
13391343
}
1344+
13401345
__m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
13411346
// CHECK-LABEL: @test_mm512_maskz_mulhi_epi16
13421347
// CHECK: @llvm.x86.avx512.pmulh.w.512
13431348
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
13441349
return _mm512_maskz_mulhi_epi16(__U,__A,__B);
13451350
}
1351+
13461352
__m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
13471353
// CHECK-LABEL: @test_mm512_mulhi_epu16
13481354
// CHECK: @llvm.x86.avx512.pmulhu.w.512
13491355
return _mm512_mulhi_epu16(__A,__B);
13501356
}
1357+
TEST_CONSTEXPR(match_v32hi(_mm512_mulhi_epu16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -64, 0, 57, 4, -60, 0, 49, 8, -56, 0, 41, 12, -52, 0, 33, 16, -48, 0, 25, 20, -44, 0, 17, 24, -40, 0, 9, 28, 5, 30, 1));
1358+
13511359
__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
13521360
// CHECK-LABEL: @test_mm512_mask_mulhi_epu16
13531361
// CHECK: @llvm.x86.avx512.pmulhu.w.512
13541362
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
13551363
return _mm512_mask_mulhi_epu16(__W,__U,__A,__B);
13561364
}
1365+
13571366
__m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
13581367
// CHECK-LABEL: @test_mm512_maskz_mulhi_epu16
13591368
// CHECK: @llvm.x86.avx512.pmulhu.w.512

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,12 +365,14 @@ __m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
365365
// CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(
366366
return _mm_mulhi_pi16(a, b);
367367
}
368+
TEST_CONSTEXPR(match_v4hi(_mm_mulhi_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), -1, -1, 0, 0));
368369

369370
__m64 test_mm_mulhi_pu16(__m64 a, __m64 b) {
370371
// CHECK-LABEL: test_mm_mulhi_pu16
371372
// CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(
372373
return _mm_mulhi_pu16(a, b);
373374
}
375+
TEST_CONSTEXPR(match_v4hi(_mm_mulhi_pu16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 0, 7, 0, -8));
374376

375377
__m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) {
376378
// CHECK-LABEL: test_mm_mulhrs_pi16

0 commit comments

Comments
 (0)