@@ -204,10 +204,10 @@ _mm_abs_epi32(__m128i __a) {
204204// / destination.
205205// / \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
206206// / both operands.
207- static __inline__ __m128i __DEFAULT_FN_ATTRS
208- _mm_hadd_epi16(__m128i __a, __m128i __b)
209- {
210- return (__m128i) __builtin_ia32_phaddw128 ( (__v8hi)__a, (__v8hi)__b);
207+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
208+ _mm_hadd_epi16(__m128i __a, __m128i __b) {
209+ return (__m128i) __builtin_ia32_phaddw128 (
210+ (__v8hi)__a, (__v8hi)__b);
211211}
212212
213213// / Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -227,10 +227,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
227227// / destination.
228228// / \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
229229// / both operands.
230- static __inline__ __m128i __DEFAULT_FN_ATTRS
231- _mm_hadd_epi32 (__m128i __a, __m128i __b)
232- {
233- return (__m128i)__builtin_ia32_phaddd128 ((__v4si)__a, (__v4si)__b);
230+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
231+ _mm_hadd_epi32 (__m128i __a, __m128i __b) {
232+ return (__m128i)__builtin_ia32_phaddd128 ((__v4si)__a, (__v4si)__b);
234233}
235234
236235// / Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -250,11 +249,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
250249// / destination.
251250// / \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
252251// / operands.
253- static __inline__ __m64 __DEFAULT_FN_ATTRS
254- _mm_hadd_pi16 (__m64 __a, __m64 __b)
255- {
256- return __trunc64 (__builtin_ia32_phaddw128 (
257- (__v8hi)__builtin_shufflevector (__a, __b, 0 , 1 ), (__v8hi){}));
252+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
253+ _mm_hadd_pi16 (__m64 __a, __m64 __b) {
254+ return __trunc64 (__builtin_ia32_phaddw128 (
255+ (__v8hi)__builtin_shufflevector (__a, __b, 0 , 1 ), (__v8hi){}));
258256}
259257
260258// / Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -274,7 +272,7 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
274272// / destination.
275273// / \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
276274// / operands.
277- static __inline__ __m64 __DEFAULT_FN_ATTRS
275+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
278276_mm_hadd_pi32 (__m64 __a, __m64 __b)
279277{
280278 return __trunc64 (__builtin_ia32_phaddd128 (
@@ -301,10 +299,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
301299// / destination.
302300// / \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
303301// / sums of both operands.
304- static __inline__ __m128i __DEFAULT_FN_ATTRS
305- _mm_hadds_epi16 (__m128i __a, __m128i __b)
306- {
307- return (__m128i)__builtin_ia32_phaddsw128 ((__v8hi)__a, (__v8hi)__b);
302+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
303+ _mm_hadds_epi16 (__m128i __a, __m128i __b) {
304+ return (__m128i)__builtin_ia32_phaddsw128 ((__v8hi)__a, (__v8hi)__b);
308305}
309306
310307// / Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -327,7 +324,7 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
327324// / destination.
328325// / \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
329326// / sums of both operands.
330- static __inline__ __m64 __DEFAULT_FN_ATTRS
327+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
331328_mm_hadds_pi16 (__m64 __a, __m64 __b)
332329{
333330 return __trunc64 (__builtin_ia32_phaddsw128 (
@@ -351,10 +348,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b)
351348// / the destination.
352349// / \returns A 128-bit vector of [8 x i16] containing the horizontal differences
353350// / of both operands.
354- static __inline__ __m128i __DEFAULT_FN_ATTRS
355- _mm_hsub_epi16 (__m128i __a, __m128i __b)
356- {
357- return (__m128i)__builtin_ia32_phsubw128 ((__v8hi)__a, (__v8hi)__b);
351+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
352+ _mm_hsub_epi16 (__m128i __a, __m128i __b) {
353+ return (__m128i)__builtin_ia32_phsubw128 ((__v8hi)__a, (__v8hi)__b);
358354}
359355
360356// / Horizontally subtracts the adjacent pairs of values contained in 2
@@ -374,10 +370,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
374370// / the destination.
375371// / \returns A 128-bit vector of [4 x i32] containing the horizontal differences
376372// / of both operands.
377- static __inline__ __m128i __DEFAULT_FN_ATTRS
378- _mm_hsub_epi32 (__m128i __a, __m128i __b)
379- {
380- return (__m128i)__builtin_ia32_phsubd128 ((__v4si)__a, (__v4si)__b);
373+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
374+ _mm_hsub_epi32 (__m128i __a, __m128i __b) {
375+ return (__m128i)__builtin_ia32_phsubd128 ((__v4si)__a, (__v4si)__b);
381376}
382377
383378// / Horizontally subtracts the adjacent pairs of values contained in 2
@@ -397,7 +392,7 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
397392// / the destination.
398393// / \returns A 64-bit vector of [4 x i16] containing the horizontal differences
399394// / of both operands.
400- static __inline__ __m64 __DEFAULT_FN_ATTRS
395+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
401396_mm_hsub_pi16 (__m64 __a, __m64 __b)
402397{
403398 return __trunc64 (__builtin_ia32_phsubw128 (
@@ -421,7 +416,7 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
421416// / the destination.
422417// / \returns A 64-bit vector of [2 x i32] containing the horizontal differences
423418// / of both operands.
424- static __inline__ __m64 __DEFAULT_FN_ATTRS
419+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
425420_mm_hsub_pi32 (__m64 __a, __m64 __b)
426421{
427422 return __trunc64 (__builtin_ia32_phsubd128 (
@@ -448,10 +443,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
448443// / the destination.
449444// / \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
450445// / differences of both operands.
451- static __inline__ __m128i __DEFAULT_FN_ATTRS
452- _mm_hsubs_epi16 (__m128i __a, __m128i __b)
453- {
454- return (__m128i)__builtin_ia32_phsubsw128 ((__v8hi)__a, (__v8hi)__b);
446+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
447+ _mm_hsubs_epi16 (__m128i __a, __m128i __b) {
448+ return (__m128i)__builtin_ia32_phsubsw128 ((__v8hi)__a, (__v8hi)__b);
455449}
456450
457451// / Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -474,7 +468,7 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
474468// / the destination.
475469// / \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
476470// / differences of both operands.
477- static __inline__ __m64 __DEFAULT_FN_ATTRS
471+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
478472_mm_hsubs_pi16 (__m64 __a, __m64 __b)
479473{
480474 return __trunc64 (__builtin_ia32_phsubsw128 (
@@ -509,10 +503,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
509503// / \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
510504// / \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
511505// / \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
512- static __inline__ __m128i __DEFAULT_FN_ATTRS
513- _mm_maddubs_epi16 (__m128i __a, __m128i __b)
514- {
515- return (__m128i)__builtin_ia32_pmaddubsw128 ((__v16qi)__a, (__v16qi)__b);
506+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
507+ _mm_maddubs_epi16 (__m128i __a, __m128i __b) {
508+ return (__m128i)__builtin_ia32_pmaddubsw128 ((__v16qi)__a, (__v16qi)__b);
516509}
517510
518511// / Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -539,11 +532,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
539532// / \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
540533// / \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
541534// / \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
542- static __inline__ __m64 __DEFAULT_FN_ATTRS
543- _mm_maddubs_pi16 (__m64 __a, __m64 __b)
544- {
545- return __trunc64 (__builtin_ia32_pmaddubsw128 ((__v16qi)__anyext128 (__a),
546- (__v16qi)__anyext128 (__b)));
535+ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
536+ _mm_maddubs_pi16 (__m64 __a, __m64 __b) {
537+ return __trunc64 (__builtin_ia32_pmaddubsw128 ((__v16qi)__anyext128 (__a),
538+ (__v16qi)__anyext128 (__b)));
547539}
548540
549541// / Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -560,7 +552,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
560552// / A 128-bit vector of [8 x i16] containing one of the source operands.
561553// / \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
562554// / products of both operands.
563- static __inline__ __m128i __DEFAULT_FN_ATTRS
555+ static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
564556_mm_mulhrs_epi16 (__m128i __a, __m128i __b)
565557{
566558 return (__m128i)__builtin_ia32_pmulhrsw128 ((__v8hi)__a, (__v8hi)__b);
0 commit comments