@@ -48,6 +48,14 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
4848 __min_vector_width__(128 )))
4949#endif
5050
51+ #if defined(__cplusplus) && (__cplusplus >= 201103L)
52+ #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
53+ #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
54+ #else
55+ #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
56+ #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
57+ #endif
58+
5159#define __trunc64 (x ) \
5260 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0 )
5361#define __zext128 (x ) \
@@ -75,9 +83,8 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
7583// / \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum
7684// / of the lower 32 bits of both operands. The upper 96 bits are copied from
7785// / the upper 96 bits of the first source operand.
78- static __inline__ __m128 __DEFAULT_FN_ATTRS
79- _mm_add_ss (__m128 __a , __m128 __b )
80- {
86+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
87+ _mm_add_ss (__m128 __a, __m128 __b) {
8188 __a[0 ] += __b[0 ];
8289 return __a;
8390}
@@ -95,9 +102,8 @@ _mm_add_ss(__m128 __a, __m128 __b)
95102// / A 128-bit vector of [4 x float] containing one of the source operands.
96103// / \returns A 128-bit vector of [4 x float] containing the sums of both
97104// / operands.
98- static __inline__ __m128 __DEFAULT_FN_ATTRS
99- _mm_add_ps (__m128 __a , __m128 __b )
100- {
105+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
106+ _mm_add_ps (__m128 __a, __m128 __b) {
101107 return (__m128)((__v4sf)__a + (__v4sf)__b);
102108}
103109
@@ -117,9 +123,8 @@ _mm_add_ps(__m128 __a, __m128 __b)
117123// / \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
118124// / difference of the lower 32 bits of both operands. The upper 96 bits are
119125// / copied from the upper 96 bits of the first source operand.
120- static __inline__ __m128 __DEFAULT_FN_ATTRS
121- _mm_sub_ss (__m128 __a , __m128 __b )
122- {
126+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
127+ _mm_sub_ss (__m128 __a, __m128 __b) {
123128 __a[0 ] -= __b[0 ];
124129 return __a;
125130}
@@ -138,9 +143,8 @@ _mm_sub_ss(__m128 __a, __m128 __b)
138143// / A 128-bit vector of [4 x float] containing the subtrahend.
139144// / \returns A 128-bit vector of [4 x float] containing the differences between
140145// / both operands.
141- static __inline__ __m128 __DEFAULT_FN_ATTRS
142- _mm_sub_ps (__m128 __a , __m128 __b )
143- {
146+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
147+ _mm_sub_ps (__m128 __a, __m128 __b) {
144148 return (__m128)((__v4sf)__a - (__v4sf)__b);
145149}
146150
@@ -160,9 +164,8 @@ _mm_sub_ps(__m128 __a, __m128 __b)
160164// / \returns A 128-bit vector of [4 x float] containing the product of the lower
161165// / 32 bits of both operands. The upper 96 bits are copied from the upper 96
162166// / bits of the first source operand.
163- static __inline__ __m128 __DEFAULT_FN_ATTRS
164- _mm_mul_ss (__m128 __a , __m128 __b )
165- {
167+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
168+ _mm_mul_ss (__m128 __a, __m128 __b) {
166169 __a[0 ] *= __b[0 ];
167170 return __a;
168171}
@@ -180,9 +183,8 @@ _mm_mul_ss(__m128 __a, __m128 __b)
180183// / A 128-bit vector of [4 x float] containing one of the source operands.
181184// / \returns A 128-bit vector of [4 x float] containing the products of both
182185// / operands.
183- static __inline__ __m128 __DEFAULT_FN_ATTRS
184- _mm_mul_ps (__m128 __a , __m128 __b )
185- {
186+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
187+ _mm_mul_ps (__m128 __a, __m128 __b) {
186188 return (__m128)((__v4sf)__a * (__v4sf)__b);
187189}
188190
@@ -202,9 +204,8 @@ _mm_mul_ps(__m128 __a, __m128 __b)
202204// / \returns A 128-bit vector of [4 x float] containing the quotients of the
203205// / lower 32 bits of both operands. The upper 96 bits are copied from the
204206// / upper 96 bits of the first source operand.
205- static __inline__ __m128 __DEFAULT_FN_ATTRS
206- _mm_div_ss (__m128 __a , __m128 __b )
207- {
207+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
208+ _mm_div_ss (__m128 __a, __m128 __b) {
208209 __a[0 ] /= __b[0 ];
209210 return __a;
210211}
@@ -221,9 +222,8 @@ _mm_div_ss(__m128 __a, __m128 __b)
221222// / A 128-bit vector of [4 x float] containing the divisor.
222223// / \returns A 128-bit vector of [4 x float] containing the quotients of both
223224// / operands.
224- static __inline__ __m128 __DEFAULT_FN_ATTRS
225- _mm_div_ps (__m128 __a , __m128 __b )
226- {
225+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
226+ _mm_div_ps (__m128 __a, __m128 __b) {
227227 return (__m128)((__v4sf)__a / (__v4sf)__b);
228228}
229229
@@ -437,9 +437,8 @@ _mm_max_ps(__m128 __a, __m128 __b)
437437// / A 128-bit vector containing one of the source operands.
438438// / \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
439439// / values between both operands.
440- static __inline__ __m128 __DEFAULT_FN_ATTRS
441- _mm_and_ps (__m128 __a , __m128 __b )
442- {
440+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
441+ _mm_and_ps (__m128 __a, __m128 __b) {
443442 return (__m128)((__v4su)__a & (__v4su)__b);
444443}
445444
@@ -459,9 +458,8 @@ _mm_and_ps(__m128 __a, __m128 __b)
459458// / \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
460459// / one's complement of the first operand and the values in the second
461460// / operand.
462- static __inline__ __m128 __DEFAULT_FN_ATTRS
463- _mm_andnot_ps (__m128 __a , __m128 __b )
464- {
461+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
462+ _mm_andnot_ps (__m128 __a, __m128 __b) {
465463 return (__m128)(~(__v4su)__a & (__v4su)__b);
466464}
467465
@@ -477,9 +475,8 @@ _mm_andnot_ps(__m128 __a, __m128 __b)
477475// / A 128-bit vector of [4 x float] containing one of the source operands.
478476// / \returns A 128-bit vector of [4 x float] containing the bitwise OR of the
479477// / values between both operands.
480- static __inline__ __m128 __DEFAULT_FN_ATTRS
481- _mm_or_ps (__m128 __a , __m128 __b )
482- {
478+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
479+ _mm_or_ps (__m128 __a, __m128 __b) {
483480 return (__m128)((__v4su)__a | (__v4su)__b);
484481}
485482
@@ -496,9 +493,8 @@ _mm_or_ps(__m128 __a, __m128 __b)
496493// / A 128-bit vector of [4 x float] containing one of the source operands.
497494// / \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR
498495// / of the values between both operands.
499- static __inline__ __m128 __DEFAULT_FN_ATTRS
500- _mm_xor_ps (__m128 __a , __m128 __b )
501- {
496+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
497+ _mm_xor_ps (__m128 __a, __m128 __b) {
502498 return (__m128)((__v4su)__a ^ (__v4su)__b);
503499}
504500
@@ -1738,9 +1734,8 @@ _mm_cvt_pi2ps(__m128 __a, __m64 __b)
17381734// / A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
17391735// / used in the extraction.
17401736// / \returns A 32-bit float containing the extracted value.
1741- static __inline__ float __DEFAULT_FN_ATTRS
1742- _mm_cvtss_f32 (__m128 __a )
1743- {
1737+ static __inline__ float __DEFAULT_FN_ATTRS_CONSTEXPR
1738+ _mm_cvtss_f32 (__m128 __a) {
17441739 return __a[0 ];
17451740}
17461741
@@ -1931,9 +1926,8 @@ _mm_undefined_ps(void)
19311926// / \returns An initialized 128-bit floating-point vector of [4 x float]. The
19321927// / lower 32 bits contain the value provided in the source operand. The
19331928// / upper 96 bits are set to zero.
1934- static __inline__ __m128 __DEFAULT_FN_ATTRS
1935- _mm_set_ss (float __w )
1936- {
1929+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1930+ _mm_set_ss (float __w) {
19371931 return __extension__ (__m128){ __w, 0 .0f , 0 .0f , 0 .0f };
19381932}
19391933
@@ -1949,9 +1943,8 @@ _mm_set_ss(float __w)
19491943// / A single-precision floating-point value used to initialize each vector
19501944// / element of the result.
19511945// / \returns An initialized 128-bit floating-point vector of [4 x float].
1952- static __inline__ __m128 __DEFAULT_FN_ATTRS
1953- _mm_set1_ps (float __w )
1954- {
1946+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1947+ _mm_set1_ps (float __w) {
19551948 return __extension__ (__m128){ __w, __w, __w, __w };
19561949}
19571950
@@ -1968,9 +1961,8 @@ _mm_set1_ps(float __w)
19681961// / A single-precision floating-point value used to initialize each vector
19691962// / element of the result.
19701963// / \returns An initialized 128-bit floating-point vector of [4 x float].
1971- static __inline__ __m128 __DEFAULT_FN_ATTRS
1972- _mm_set_ps1 (float __w )
1973- {
1964+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1965+ _mm_set_ps1 (float __w) {
19741966 return _mm_set1_ps (__w);
19751967}
19761968
@@ -1995,9 +1987,8 @@ _mm_set_ps1(float __w)
19951987// / A single-precision floating-point value used to initialize bits [31:0]
19961988// / of the result.
19971989// / \returns An initialized 128-bit floating-point vector of [4 x float].
1998- static __inline__ __m128 __DEFAULT_FN_ATTRS
1999- _mm_set_ps (float __z , float __y , float __x , float __w )
2000- {
1990+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
1991+ _mm_set_ps (float __z, float __y, float __x, float __w) {
20011992 return __extension__ (__m128){ __w, __x, __y, __z };
20021993}
20031994
@@ -2023,9 +2014,8 @@ _mm_set_ps(float __z, float __y, float __x, float __w)
20232014// / A single-precision floating-point value used to initialize bits [127:96]
20242015// / of the result.
20252016// / \returns An initialized 128-bit floating-point vector of [4 x float].
2026- static __inline__ __m128 __DEFAULT_FN_ATTRS
2027- _mm_setr_ps (float __z , float __y , float __x , float __w )
2028- {
2017+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2018+ _mm_setr_ps (float __z, float __y, float __x, float __w) {
20292019 return __extension__ (__m128){ __z, __y, __x, __w };
20302020}
20312021
@@ -2038,9 +2028,8 @@ _mm_setr_ps(float __z, float __y, float __x, float __w)
20382028// /
20392029// / \returns An initialized 128-bit floating-point vector of [4 x float] with
20402030// / all elements set to zero.
2041- static __inline__ __m128 __DEFAULT_FN_ATTRS
2042- _mm_setzero_ps (void )
2043- {
2031+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2032+ _mm_setzero_ps (void ) {
20442033 return __extension__ (__m128){ 0 .0f , 0 .0f , 0 .0f , 0 .0f };
20452034}
20462035
@@ -2786,9 +2775,8 @@ void _mm_setcsr(unsigned int __i);
27862775// / Bits [95:64] are written to bits [63:32] of the destination. \n
27872776// / Bits [127:96] are written to bits [127:96] of the destination.
27882777// / \returns A 128-bit vector of [4 x float] containing the interleaved values.
2789- static __inline__ __m128 __DEFAULT_FN_ATTRS
2790- _mm_unpackhi_ps (__m128 __a , __m128 __b )
2791- {
2778+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2779+ _mm_unpackhi_ps (__m128 __a, __m128 __b) {
27922780 return __builtin_shufflevector ((__v4sf)__a, (__v4sf)__b, 2 , 6 , 3 , 7 );
27932781}
27942782
@@ -2808,9 +2796,8 @@ _mm_unpackhi_ps(__m128 __a, __m128 __b)
28082796// / Bits [31:0] are written to bits [63:32] of the destination. \n
28092797// / Bits [63:32] are written to bits [127:96] of the destination.
28102798// / \returns A 128-bit vector of [4 x float] containing the interleaved values.
2811- static __inline__ __m128 __DEFAULT_FN_ATTRS
2812- _mm_unpacklo_ps (__m128 __a , __m128 __b )
2813- {
2799+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2800+ _mm_unpacklo_ps (__m128 __a, __m128 __b) {
28142801 return __builtin_shufflevector ((__v4sf)__a, (__v4sf)__b, 0 , 4 , 1 , 5 );
28152802}
28162803
@@ -2830,9 +2817,8 @@ _mm_unpacklo_ps(__m128 __a, __m128 __b)
28302817// / A 128-bit floating-point vector of [4 x float]. The lower 32 bits are
28312818// / written to the lower 32 bits of the result.
28322819// / \returns A 128-bit floating-point vector of [4 x float].
2833- static __inline__ __m128 __DEFAULT_FN_ATTRS
2834- _mm_move_ss (__m128 __a , __m128 __b )
2835- {
2820+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2821+ _mm_move_ss (__m128 __a, __m128 __b) {
28362822 __a[0 ] = __b[0 ];
28372823 return __a;
28382824}
@@ -2852,9 +2838,8 @@ _mm_move_ss(__m128 __a, __m128 __b)
28522838// / A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
28532839// / written to the lower 64 bits of the result.
28542840// / \returns A 128-bit floating-point vector of [4 x float].
2855- static __inline__ __m128 __DEFAULT_FN_ATTRS
2856- _mm_movehl_ps (__m128 __a , __m128 __b )
2857- {
2841+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2842+ _mm_movehl_ps (__m128 __a, __m128 __b) {
28582843 return __builtin_shufflevector ((__v4sf)__a, (__v4sf)__b, 6 , 7 , 2 , 3 );
28592844}
28602845
@@ -2873,9 +2858,8 @@ _mm_movehl_ps(__m128 __a, __m128 __b)
28732858// / A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
28742859// / written to the upper 64 bits of the result.
28752860// / \returns A 128-bit floating-point vector of [4 x float].
2876- static __inline__ __m128 __DEFAULT_FN_ATTRS
2877- _mm_movelh_ps (__m128 __a , __m128 __b )
2878- {
2861+ static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
2862+ _mm_movelh_ps (__m128 __a, __m128 __b) {
28792863 return __builtin_shufflevector ((__v4sf)__a, (__v4sf)__b, 0 , 1 , 4 , 5 );
28802864}
28812865
@@ -3207,7 +3191,9 @@ do { \
32073191#undef __anyext128
32083192#undef __zeroupper64
32093193#undef __DEFAULT_FN_ATTRS
3194+ #undef __DEFAULT_FN_ATTRS_CONSTEXPR
32103195#undef __DEFAULT_FN_ATTRS_SSE2
3196+ #undef __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
32113197
32123198/* Ugly hack for backwards-compatibility (compatible with gcc) */
32133199#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)
0 commit comments