Skip to content

Commit 2fadf3f

Browse files
author
whyuuwang
committed
deal issues 15595
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #155395
1 parent a81c406 commit 2fadf3f

File tree

4 files changed

+39
-52
lines changed

4 files changed

+39
-52
lines changed

clang/lib/Headers/avx2intrin.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -886,9 +886,8 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) {
886886
/// A 256-bit vector of [8 x i32] containing one of the source operands.
887887
/// \returns A 256-bit vector of [8 x i32] containing the sums.
888888
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
889-
_mm256_hadd_epi32(__m256i __a, __m256i __b)
890-
{
891-
return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
889+
_mm256_hadd_epi32(__m256i __a, __m256i __b) {
890+
return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
892891
}
893892

894893
/// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit
@@ -987,9 +986,8 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) {
987986
/// A 256-bit vector of [8 x i32] containing one of the source operands.
988987
/// \returns A 256-bit vector of [8 x i32] containing the differences.
989988
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
990-
_mm256_hsub_epi32(__m256i __a, __m256i __b)
991-
{
992-
return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
989+
_mm256_hsub_epi32(__m256i __a, __m256i __b) {
990+
return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
993991
}
994992

995993
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -1023,9 +1021,8 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
10231021
/// A 256-bit vector of [16 x i16] containing one of the source operands.
10241022
/// \returns A 256-bit vector of [16 x i16] containing the differences.
10251023
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1026-
_mm256_hsubs_epi16(__m256i __a, __m256i __b)
1027-
{
1028-
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
1024+
_mm256_hsubs_epi16(__m256i __a, __m256i __b) {
1025+
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
10291026
}
10301027

10311028
/// Multiplies each unsigned byte from the 256-bit integer vector in \a __a

clang/lib/Headers/avxintrin.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -704,8 +704,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
704704
/// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
705705
/// both operands.
706706
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
707-
_mm256_hadd_pd(__m256d __a, __m256d __b)
708-
{
707+
_mm256_hadd_pd(__m256d __a, __m256d __b) {
709708
return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
710709
}
711710

@@ -726,8 +725,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
726725
/// index 2, 3, 6, 7 of a vector of [8 x float].
727726
/// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
728727
/// both operands.
729-
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
730-
_mm256_hadd_ps(__m256 __a, __m256 __b) {
728+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a,
729+
__m256 __b) {
731730
return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
732731
}
733732

@@ -749,8 +748,7 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) {
749748
/// \returns A 256-bit vector of [4 x double] containing the horizontal
750749
/// differences of both operands.
751750
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
752-
_mm256_hsub_pd(__m256d __a, __m256d __b)
753-
{
751+
_mm256_hsub_pd(__m256d __a, __m256d __b) {
754752
return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
755753
}
756754

@@ -771,9 +769,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
771769
/// elements with index 2, 3, 6, 7 of a vector of [8 x float].
772770
/// \returns A 256-bit vector of [8 x float] containing the horizontal
773771
/// differences of both operands.
774-
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
775-
_mm256_hsub_ps(__m256 __a, __m256 __b)
776-
{
772+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
773+
__m256 __b) {
777774
return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
778775
}
779776

clang/lib/Headers/pmmintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
8989
/// destination.
9090
/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
9191
/// both operands.
92-
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
93-
_mm_hadd_ps(__m128 __a, __m128 __b) {
92+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a,
93+
__m128 __b) {
9494
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
9595
}
9696

clang/lib/Headers/tmmintrin.h

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,7 @@ _mm_abs_epi32(__m128i __a) {
206206
/// both operands.
207207
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
208208
_mm_hadd_epi16(__m128i __a, __m128i __b) {
209-
return (__m128i)__builtin_ia32_phaddw128(
210-
(__v8hi)__a, (__v8hi)__b);
209+
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
211210
}
212211

213212
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -249,8 +248,8 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) {
249248
/// destination.
250249
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
251250
/// operands.
252-
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
253-
_mm_hadd_pi16(__m64 __a, __m64 __b) {
251+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a,
252+
__m64 __b) {
254253
return __trunc64(__builtin_ia32_phaddw128(
255254
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
256255
}
@@ -272,11 +271,10 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
272271
/// destination.
273272
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
274273
/// operands.
275-
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
276-
_mm_hadd_pi32(__m64 __a, __m64 __b)
277-
{
278-
return __trunc64(__builtin_ia32_phaddd128(
279-
(__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
274+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a,
275+
__m64 __b) {
276+
return __trunc64(__builtin_ia32_phaddd128(
277+
(__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
280278
}
281279

282280
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -324,11 +322,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) {
324322
/// destination.
325323
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
326324
/// sums of both operands.
327-
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
328-
_mm_hadds_pi16(__m64 __a, __m64 __b)
329-
{
330-
return __trunc64(__builtin_ia32_phaddsw128(
331-
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
325+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a,
326+
__m64 __b) {
327+
return __trunc64(__builtin_ia32_phaddsw128(
328+
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
332329
}
333330

334331
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -392,11 +389,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) {
392389
/// the destination.
393390
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
394391
/// of both operands.
395-
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
396-
_mm_hsub_pi16(__m64 __a, __m64 __b)
397-
{
398-
return __trunc64(__builtin_ia32_phsubw128(
399-
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
392+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a,
393+
__m64 __b) {
394+
return __trunc64(__builtin_ia32_phsubw128(
395+
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
400396
}
401397

402398
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -416,11 +412,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
416412
/// the destination.
417413
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
418414
/// of both operands.
419-
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
420-
_mm_hsub_pi32(__m64 __a, __m64 __b)
421-
{
422-
return __trunc64(__builtin_ia32_phsubd128(
423-
(__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
415+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a,
416+
__m64 __b) {
417+
return __trunc64(__builtin_ia32_phsubd128(
418+
(__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
424419
}
425420

426421
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -468,11 +463,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) {
468463
/// the destination.
469464
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
470465
/// differences of both operands.
471-
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
472-
_mm_hsubs_pi16(__m64 __a, __m64 __b)
473-
{
474-
return __trunc64(__builtin_ia32_phsubsw128(
475-
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
466+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a,
467+
__m64 __b) {
468+
return __trunc64(__builtin_ia32_phsubsw128(
469+
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
476470
}
477471

478472
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -553,9 +547,8 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) {
553547
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
554548
/// products of both operands.
555549
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
556-
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
557-
{
558-
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
550+
_mm_mulhrs_epi16(__m128i __a, __m128i __b) {
551+
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
559552
}
560553

561554
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit

0 commit comments

Comments
 (0)