Skip to content

Commit 6d3104f

Browse files
committed
Update MMX/SSE/AVX/AVX512 PMADDWD/PMADDUBSW intrinsics to be used in constexpr
1 parent b9c7000 commit 6d3104f

File tree

6 files changed

+27
-24
lines changed

6 files changed

+27
-24
lines changed

clang/lib/Headers/avx2intrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ _mm256_hsubs_epi16(__m256i __a, __m256i __b)
10351035
/// \param __b
10361036
/// A 256-bit vector containing one of the source operands.
10371037
/// \returns A 256-bit vector of [16 x i16] containing the result.
1038-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1038+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
10391039
_mm256_maddubs_epi16(__m256i __a, __m256i __b)
10401040
{
10411041
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
@@ -1067,7 +1067,7 @@ _mm256_maddubs_epi16(__m256i __a, __m256i __b)
10671067
/// \param __b
10681068
/// A 256-bit vector of [16 x i16] containing one of the source operands.
10691069
/// \returns A 256-bit vector of [8 x i32] containing the result.
1070-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1070+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
10711071
_mm256_madd_epi16(__m256i __a, __m256i __b)
10721072
{
10731073
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);

clang/lib/Headers/avx512bwintrin.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,39 +1064,39 @@ _mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
10641064
(__v32hi)_mm512_setzero_si512());
10651065
}
10661066

1067-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1067+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10681068
_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
10691069
return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
10701070
}
10711071

1072-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1072+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10731073
_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
10741074
__m512i __Y) {
10751075
return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
10761076
(__v32hi)_mm512_maddubs_epi16(__X, __Y),
10771077
(__v32hi)__W);
10781078
}
10791079

1080-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1080+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10811081
_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
10821082
return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
10831083
(__v32hi)_mm512_maddubs_epi16(__X, __Y),
10841084
(__v32hi)_mm512_setzero_si512());
10851085
}
10861086

1087-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1087+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10881088
_mm512_madd_epi16(__m512i __A, __m512i __B) {
10891089
return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
10901090
}
10911091

1092-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1092+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
10931093
_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
10941094
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
10951095
(__v16si)_mm512_madd_epi16(__A, __B),
10961096
(__v16si)__W);
10971097
}
10981098

1099-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1099+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
11001100
_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
11011101
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
11021102
(__v16si)_mm512_madd_epi16(__A, __B),

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,57 +1295,57 @@ _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
12951295
(__v16hi)_mm256_setzero_si256());
12961296
}
12971297

1298-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1298+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
12991299
_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
13001300
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
13011301
(__v8hi)_mm_maddubs_epi16(__X, __Y),
13021302
(__v8hi)__W);
13031303
}
13041304

1305-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1305+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
13061306
_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
13071307
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
13081308
(__v8hi)_mm_maddubs_epi16(__X, __Y),
13091309
(__v8hi)_mm_setzero_si128());
13101310
}
13111311

1312-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1312+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13131313
_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
13141314
__m256i __Y) {
13151315
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
13161316
(__v16hi)_mm256_maddubs_epi16(__X, __Y),
13171317
(__v16hi)__W);
13181318
}
13191319

1320-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1320+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13211321
_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
13221322
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
13231323
(__v16hi)_mm256_maddubs_epi16(__X, __Y),
13241324
(__v16hi)_mm256_setzero_si256());
13251325
}
13261326

1327-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1327+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
13281328
_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
13291329
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
13301330
(__v4si)_mm_madd_epi16(__A, __B),
13311331
(__v4si)__W);
13321332
}
13331333

1334-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1334+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
13351335
_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
13361336
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
13371337
(__v4si)_mm_madd_epi16(__A, __B),
13381338
(__v4si)_mm_setzero_si128());
13391339
}
13401340

1341-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1341+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13421342
_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
13431343
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
13441344
(__v8si)_mm256_madd_epi16(__A, __B),
13451345
(__v8si)__W);
13461346
}
13471347

1348-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1348+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
13491349
_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
13501350
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
13511351
(__v8si)_mm256_madd_epi16(__A, __B),

clang/lib/Headers/emmintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2290,7 +2290,7 @@ _mm_avg_epu16(__m128i __a, __m128i __b) {
22902290
/// A 128-bit signed [8 x i16] vector.
22912291
/// \returns A 128-bit signed [4 x i32] vector containing the sums of products
22922292
/// of both parameters.
2293-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
2293+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_madd_epi16(__m128i __a,
22942294
__m128i __b) {
22952295
return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
22962296
}

clang/lib/Headers/mmintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -679,11 +679,11 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2) {
679679
/// A 64-bit integer vector of [4 x i16].
680680
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
681681
/// products of both parameters.
682-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
682+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
683683
_mm_madd_pi16(__m64 __m1, __m64 __m2)
684684
{
685-
return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
686-
(__v8hi)__anyext128(__m2)));
685+
return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__zext128(__m1),
686+
(__v8hi)__zext128(__m2)));
687687
}
688688

689689
/// Multiplies each 16-bit signed integer element of the first 64-bit

clang/lib/Headers/tmmintrin.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323

2424
#define __trunc64(x) \
2525
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
26+
#define __zext128(x) \
27+
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
28+
1, 2, 3)
2629
#define __anyext128(x) \
2730
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
2831
1, -1, -1)
@@ -504,7 +507,7 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
504507
/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
505508
/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
506509
/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
507-
static __inline__ __m128i __DEFAULT_FN_ATTRS
510+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
508511
_mm_maddubs_epi16(__m128i __a, __m128i __b)
509512
{
510513
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
@@ -534,11 +537,11 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
534537
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
535538
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
536539
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
537-
static __inline__ __m64 __DEFAULT_FN_ATTRS
540+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
538541
_mm_maddubs_pi16(__m64 __a, __m64 __b)
539542
{
540-
return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
541-
(__v16qi)__anyext128(__b)));
543+
return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__zext128(__a),
544+
(__v16qi)__zext128(__b)));
542545
}
543546

544547
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit

0 commit comments

Comments
 (0)