Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,15 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}

def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}
}

// AVX
Expand Down Expand Up @@ -584,7 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
Expand Down Expand Up @@ -629,6 +631,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;

def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;

Expand Down Expand Up @@ -1340,7 +1343,7 @@ let Features = "avx512bitalg,evex512", Attributes = [NoThrow, Const, RequiredVec
def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">;
}

let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmulhrsw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this with pmulhuw512 etc. below instead of creating a new block?

}

Expand Down
14 changes: 14 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3311,6 +3311,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});


case clang::X86::BI__builtin_ia32_pmulhrsw128:
case clang::X86::BI__builtin_ia32_pmulhrsw256:
case clang::X86::BI__builtin_ia32_pmulhrsw512:
return interp__builtin_elementwise_int_binop(
S, OpPC, Call, BuiltinID,[](const APSInt &LHS, const APSInt &RHS) {
unsigned width = LHS.getBitWidth();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix capitalization of local variables.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!


APInt mul = llvm::APIntOps::mulhs(LHS, RHS);
mul = mul.relativeLShr(14);
mul = mul.sadd_sat(APInt(width, 1, true));
return APInt(mul.relativeLShr(1));
});

case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
Expand Down
19 changes: 18 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11679,7 +11679,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}

case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat:
case Builtin::BI__builtin_elementwise_sub_sat:
case clang::X86::BI__builtin_ia32_pmulhrsw128:
case clang::X86::BI__builtin_ia32_pmulhrsw256:
case clang::X86::BI__builtin_ia32_pmulhrsw512:
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
Expand Down Expand Up @@ -11813,6 +11816,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
APSInt(LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS),
DestUnsigned)));
break;

case clang::X86::BI__builtin_ia32_pmulhrsw128:
case clang::X86::BI__builtin_ia32_pmulhrsw256:
case clang::X86::BI__builtin_ia32_pmulhrsw512: {
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
unsigned width = Info.Ctx.getIntWidth(DestEltTy);

APInt mul = llvm::APIntOps::mulhs(LHS, RHS);
mul = mul.relativeLShr(14);
mul = mul.sadd_sat(APInt(width, 1, true));
ResultElements.push_back(APValue(APSInt(mul.relativeLShr(1))));
break;
}
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
Expand All @@ -11825,6 +11841,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
ResultElements.push_back(APValue(APSInt(llvm::APIntOps::mulhs(LHS, RHS),
/*isUnsigned=*/false)));
break;

case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Headers/avx2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1678,7 +1678,7 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) {
/// \param __b
/// A 256-bit vector of [16 x i16] containing one of the source operands.
/// \returns A 256-bit vector of [16 x i16] containing the rounded products.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mulhrs_epi16(__m256i __a, __m256i __b)
{
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1046,21 +1046,21 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
(__v32hi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mulhrs_epi16(__A, __B),
(__v32hi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/Headers/avx512vlbwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1571,21 +1571,21 @@ _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
(__v8hi)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhrs_epi16(__X, __Y),
(__v8hi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mulhrs_epi16(__X, __Y),
(__v16hi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mulhrs_epi16(__X, __Y),
Expand Down
11 changes: 7 additions & 4 deletions clang/lib/Headers/tmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@

#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
#define __zext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
Expand Down Expand Up @@ -560,7 +563,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
/// A 128-bit vector of [8 x i16] containing one of the source operands.
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
/// products of both operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
{
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
Expand All @@ -580,11 +583,11 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
/// A 64-bit vector of [4 x i16] containing one of the source operands.
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
/// products of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
{
return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a),
(__v8hi)__anyext128(__b)));
return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a),
(__v8hi)__zext128(__b)));
}

/// Copies the 8-bit integers from a 128-bit integer vector to the
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,7 @@ __m256i test_mm256_mulhrs_epi16(__m256i a, __m256i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_mulhrs_epi16(a, b);
}
TEST_CONSTEXPR(match_v16hi(_mm256_mulhrs_epi16((__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2));

__m256i test_mm256_mullo_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_mullo_epi16
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/avx512bw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1375,6 +1375,8 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
return _mm512_mulhrs_epi16(__A,__B);
}
TEST_CONSTEXPR(match_v32hi(_mm512_mulhrs_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing mask/maskz tests (and mask/maskz tests in avx512bwvl-builtins.c)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I got them all this time

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result values of just 0 or 2 don't fill me with confidence :/


__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhrs_epi16
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ __m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(
return _mm_mulhrs_pi16(a, b);
}
TEST_CONSTEXPR(match_v4hi(_mm_mulhrs_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-10, +8, +6, -4}), 2, 2, 0, 0));

__m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mullo_pi16
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/X86/ssse3-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ __m128i test_mm_mulhrs_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_mulhrs_epi16(a, b);
}
TEST_CONSTEXPR(match_v4si(_mm_mulhrs_epi16((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), 2, 2, 0, 0));

__m128i test_mm_shuffle_epi8(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_shuffle_epi8
Expand Down