Skip to content

Commit 5114dec

Browse files
committed
Add support for constexpr with PMULHRSW
1 parent e90ab31 commit 5114dec

File tree

11 files changed

+58
-15
lines changed

11 files changed

+58
-15
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,15 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
125125
}
126126

127127
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
128-
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
129128
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
130129
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
131130
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
132131
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
133132
}
133+
134+
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
135+
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
136+
}
134137
}
135138

136139
// AVX
@@ -584,7 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
584587
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
585588
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
586589
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
587-
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
588590
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
589591
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
590592
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
@@ -629,6 +631,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
629631
def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
630632
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
631633

634+
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
632635
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
633636
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
634637

@@ -1340,7 +1343,7 @@ let Features = "avx512bitalg,evex512", Attributes = [NoThrow, Const, RequiredVec
13401343
def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
13411344
}
13421345

1343-
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1346+
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13441347
def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
13451348
}
13461349

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "../ExprConstShared.h"
99
#include "Boolean.h"
1010
#include "EvalEmitter.h"
11+
#include "FixedPoint.h"
1112
#include "Interp.h"
1213
#include "InterpBuiltinBitCast.h"
1314
#include "PrimType.h"
@@ -3311,6 +3312,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33113312
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
33123313
});
33133314

3315+
3316+
case clang::X86::BI__builtin_ia32_pmulhrsw128:
3317+
case clang::X86::BI__builtin_ia32_pmulhrsw256:
3318+
case clang::X86::BI__builtin_ia32_pmulhrsw512:
3319+
return interp__builtin_elementwise_int_binop(
3320+
S, OpPC, Call, BuiltinID,[](const APSInt &LHS, const APSInt &RHS) {
3321+
unsigned width = LHS.getBitWidth();
3322+
3323+
APInt mul = llvm::APIntOps::mulhs(LHS, RHS);
3324+
mul = mul.relativeLShr(14);
3325+
mul = mul.sadd_sat(APInt(width, 1, true));
3326+
return APInt(mul.relativeLShr(1));
3327+
});
3328+
33143329
case clang::X86::BI__builtin_ia32_pmulhuw128:
33153330
case clang::X86::BI__builtin_ia32_pmulhuw256:
33163331
case clang::X86::BI__builtin_ia32_pmulhuw512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11679,7 +11679,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1167911679
}
1168011680

1168111681
case Builtin::BI__builtin_elementwise_add_sat:
11682-
case Builtin::BI__builtin_elementwise_sub_sat:
11682+
case Builtin::BI__builtin_elementwise_sub_sat:
11683+
case clang::X86::BI__builtin_ia32_pmulhrsw128:
11684+
case clang::X86::BI__builtin_ia32_pmulhrsw256:
11685+
case clang::X86::BI__builtin_ia32_pmulhrsw512:
1168311686
case clang::X86::BI__builtin_ia32_pmulhuw128:
1168411687
case clang::X86::BI__builtin_ia32_pmulhuw256:
1168511688
case clang::X86::BI__builtin_ia32_pmulhuw512:
@@ -11813,6 +11816,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1181311816
APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
1181411817
DestUnsigned)));
1181511818
break;
11819+
11820+
case clang::X86::BI__builtin_ia32_pmulhrsw128:
11821+
case clang::X86::BI__builtin_ia32_pmulhrsw256:
11822+
case clang::X86::BI__builtin_ia32_pmulhrsw512: {
11823+
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
11824+
unsigned width = Info.Ctx.getIntWidth(DestEltTy);
11825+
11826+
APInt mul = llvm::APIntOps::mulhs(LHS, RHS);
11827+
mul = mul.relativeLShr(14);
11828+
mul = mul.sadd_sat(APInt(width, 1, true));
11829+
ResultElements.push_back(APValue(APSInt(mul.relativeLShr(1))));
11830+
break;
11831+
}
1181611832
case clang::X86::BI__builtin_ia32_pmulhuw128:
1181711833
case clang::X86::BI__builtin_ia32_pmulhuw256:
1181811834
case clang::X86::BI__builtin_ia32_pmulhuw512:
@@ -11825,6 +11841,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1182511841
ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
1182611842
/*isUnsigned=*/false)));
1182711843
break;
11844+
1182811845
case clang::X86::BI__builtin_ia32_psllv2di:
1182911846
case clang::X86::BI__builtin_ia32_psllv4di:
1183011847
case clang::X86::BI__builtin_ia32_psllv4si:

clang/lib/Headers/avx2intrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1678,7 +1678,7 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) {
16781678
/// \param __b
16791679
/// A 256-bit vector of [16 x i16] containing one of the source operands.
16801680
/// \returns A 256-bit vector of [16 x i16] containing the rounded products.
1681-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1681+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
16821682
_mm256_mulhrs_epi16(__m256i __a, __m256i __b)
16831683
{
16841684
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);

clang/lib/Headers/avx512bwintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,21 +1046,21 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
10461046
(__v32hi)_mm512_setzero_si512());
10471047
}
10481048

1049-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1049+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10501050
_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
10511051
{
10521052
return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
10531053
}
10541054

1055-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1055+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10561056
_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
10571057
{
10581058
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
10591059
(__v32hi)_mm512_mulhrs_epi16(__A, __B),
10601060
(__v32hi)__W);
10611061
}
10621062

1063-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1063+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10641064
_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
10651065
{
10661066
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,21 +1571,21 @@ _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
15711571
(__v8hi)__W);
15721572
}
15731573

1574-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1574+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
15751575
_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
15761576
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
15771577
(__v8hi)_mm_mulhrs_epi16(__X, __Y),
15781578
(__v8hi)_mm_setzero_si128());
15791579
}
15801580

1581-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1581+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
15821582
_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
15831583
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
15841584
(__v16hi)_mm256_mulhrs_epi16(__X, __Y),
15851585
(__v16hi)__W);
15861586
}
15871587

1588-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1588+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
15891589
_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
15901590
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
15911591
(__v16hi)_mm256_mulhrs_epi16(__X, __Y),

clang/lib/Headers/tmmintrin.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929

3030
#define __trunc64(x) \
3131
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
32+
#define __zext128(x) \
33+
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
34+
1, 2, 3)
3235
#define __anyext128(x) \
3336
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
3437
1, -1, -1)
@@ -560,7 +563,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
560563
/// A 128-bit vector of [8 x i16] containing one of the source operands.
561564
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
562565
/// products of both operands.
563-
static __inline__ __m128i __DEFAULT_FN_ATTRS
566+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
564567
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
565568
{
566569
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
@@ -580,11 +583,11 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
580583
/// A 64-bit vector of [4 x i16] containing one of the source operands.
581584
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
582585
/// products of both operands.
583-
static __inline__ __m64 __DEFAULT_FN_ATTRS
586+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
584587
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
585588
{
586-
return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a),
587-
(__v8hi)__anyext128(__b)));
589+
return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a),
590+
(__v8hi)__zext128(__b)));
588591
}
589592

590593
/// Copies the 8-bit integers from a 128-bit integer vector to the

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,7 @@ __m256i test_mm256_mulhrs_epi16(__m256i a, __m256i b) {
987987
// CHECK: call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
988988
return _mm256_mulhrs_epi16(a, b);
989989
}
990+
TEST_CONSTEXPR(match_v16hi(_mm256_mulhrs_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2));
990991

991992
__m256i test_mm256_mullo_epi16(__m256i a, __m256i b) {
992993
// CHECK-LABEL: test_mm256_mullo_epi16

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,6 +1375,8 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
13751375
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
13761376
return _mm512_mulhrs_epi16(__A,__B);
13771377
}
1378+
TEST_CONSTEXPR(match_v32hi(_mm512_mulhrs_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2));
1379+
13781380
__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
13791381
// CHECK-LABEL: test_mm512_mask_mulhrs_epi16
13801382
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ __m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) {
426426
// CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(
427427
return _mm_mulhrs_pi16(a, b);
428428
}
429+
TEST_CONSTEXPR(match_v4hi(_mm_mulhrs_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 2, 2, 0, 0));
429430

430431
__m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
431432
// CHECK-LABEL: test_mm_mullo_pi16

0 commit comments

Comments
 (0)