Skip to content

Commit ab819f4

Browse files
committed
[Headers][X86] Allow FMA3/FMA4 vector intrinsics to be used in constexpr
Now that #152455 is done, we can make all the vector fma intrinsics that wrap __builtin_elementwise_fma to be constexpr
1 parent 0499d3a commit ab819f4

File tree

4 files changed

+86
-32
lines changed

4 files changed

+86
-32
lines changed

clang/lib/Headers/fma4intrin.h

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,22 @@
2020
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
2121
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
2222

23-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
23+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
24+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
25+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
26+
#else
27+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
28+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
29+
#endif
30+
31+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2432
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
2533
{
2634
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
2735
(__v4sf)__C);
2836
}
2937

30-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
38+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
3139
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
3240
{
3341
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
@@ -46,14 +54,14 @@ _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
4654
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
4755
}
4856

49-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
57+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
5058
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
5159
{
5260
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
5361
-(__v4sf)__C);
5462
}
5563

56-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
64+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5765
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
5866
{
5967
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
@@ -72,14 +80,14 @@ _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
7280
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
7381
}
7482

75-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
83+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
7684
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
7785
{
7886
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
7987
(__v4sf)__C);
8088
}
8189

82-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
90+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
8391
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
8492
{
8593
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
@@ -98,14 +106,14 @@ _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
98106
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
99107
}
100108

101-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
109+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
102110
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
103111
{
104112
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
105113
-(__v4sf)__C);
106114
}
107115

108-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
116+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
109117
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
110118
{
111119
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
@@ -148,56 +156,56 @@ _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
148156
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
149157
}
150158

151-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
159+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
152160
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
153161
{
154162
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
155163
(__v8sf)__C);
156164
}
157165

158-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
166+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
159167
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
160168
{
161169
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
162170
(__v4df)__C);
163171
}
164172

165-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
173+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
166174
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
167175
{
168176
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
169177
-(__v8sf)__C);
170178
}
171179

172-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
180+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
173181
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
174182
{
175183
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
176184
-(__v4df)__C);
177185
}
178186

179-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
187+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
180188
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
181189
{
182190
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
183191
(__v8sf)__C);
184192
}
185193

186-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
194+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
187195
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
188196
{
189197
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
190198
(__v4df)__C);
191199
}
192200

193-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
201+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
194202
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
195203
{
196204
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
197205
-(__v8sf)__C);
198206
}
199207

200-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
208+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
201209
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
202210
{
203211
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
@@ -230,5 +238,7 @@ _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
230238

231239
#undef __DEFAULT_FN_ATTRS128
232240
#undef __DEFAULT_FN_ATTRS256
241+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
242+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
233243

234244
#endif /* __FMA4INTRIN_H */

clang/lib/Headers/fmaintrin.h

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@
1818
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
1919
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
2020

21+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
22+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
23+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
24+
#else
25+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
26+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
27+
#endif
28+
2129
/// Computes a multiply-add of 128-bit vectors of [4 x float].
2230
/// For each element, computes <c> (__A * __B) + __C </c>.
2331
///
@@ -32,7 +40,7 @@
3240
/// \param __C
3341
/// A 128-bit vector of [4 x float] containing the addend.
3442
/// \returns A 128-bit vector of [4 x float] containing the result.
35-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
43+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
3644
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
3745
{
3846
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
@@ -53,7 +61,7 @@ _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
5361
/// \param __C
5462
/// A 128-bit vector of [2 x double] containing the addend.
5563
/// \returns A 128-bit [2 x double] vector containing the result.
56-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
64+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
5765
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
5866
{
5967
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
@@ -132,7 +140,7 @@ _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
132140
/// \param __C
133141
/// A 128-bit vector of [4 x float] containing the subtrahend.
134142
/// \returns A 128-bit vector of [4 x float] containing the result.
135-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
143+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
136144
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
137145
{
138146
return (__m128)__builtin_elementwise_fma((__v4sf)__A, (__v4sf)__B,
@@ -153,7 +161,7 @@ _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
153161
/// \param __C
154162
/// A 128-bit vector of [2 x double] containing the addend.
155163
/// \returns A 128-bit vector of [2 x double] containing the result.
156-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
164+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
157165
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
158166
{
159167
return (__m128d)__builtin_elementwise_fma((__v2df)__A, (__v2df)__B,
@@ -232,7 +240,7 @@ _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
232240
/// \param __C
233241
/// A 128-bit vector of [4 x float] containing the addend.
234242
/// \returns A 128-bit [4 x float] vector containing the result.
235-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
243+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
236244
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
237245
{
238246
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
@@ -253,7 +261,7 @@ _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
253261
/// \param __C
254262
/// A 128-bit vector of [2 x double] containing the addend.
255263
/// \returns A 128-bit vector of [2 x double] containing the result.
256-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
264+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
257265
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
258266
{
259267
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
@@ -332,7 +340,7 @@ _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
332340
/// \param __C
333341
/// A 128-bit vector of [4 x float] containing the subtrahend.
334342
/// \returns A 128-bit vector of [4 x float] containing the result.
335-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
343+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
336344
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
337345
{
338346
return (__m128)__builtin_elementwise_fma(-(__v4sf)__A, (__v4sf)__B,
@@ -353,7 +361,7 @@ _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
353361
/// \param __C
354362
/// A 128-bit vector of [2 x double] containing the subtrahend.
355363
/// \returns A 128-bit vector of [2 x double] containing the result.
356-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
364+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
357365
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
358366
{
359367
return (__m128d)__builtin_elementwise_fma(-(__v2df)__A, (__v2df)__B,
@@ -536,7 +544,7 @@ _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
536544
/// \param __C
537545
/// A 256-bit vector of [8 x float] containing the addend.
538546
/// \returns A 256-bit vector of [8 x float] containing the result.
539-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
547+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
540548
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
541549
{
542550
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
@@ -557,7 +565,7 @@ _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
557565
/// \param __C
558566
/// A 256-bit vector of [4 x double] containing the addend.
559567
/// \returns A 256-bit vector of [4 x double] containing the result.
560-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
568+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
561569
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
562570
{
563571
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
@@ -578,7 +586,7 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
578586
/// \param __C
579587
/// A 256-bit vector of [8 x float] containing the subtrahend.
580588
/// \returns A 256-bit vector of [8 x float] containing the result.
581-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
589+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
582590
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
583591
{
584592
return (__m256)__builtin_elementwise_fma((__v8sf)__A, (__v8sf)__B,
@@ -599,7 +607,7 @@ _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
599607
/// \param __C
600608
/// A 256-bit vector of [4 x double] containing the subtrahend.
601609
/// \returns A 256-bit vector of [4 x double] containing the result.
602-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
610+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
603611
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
604612
{
605613
return (__m256d)__builtin_elementwise_fma((__v4df)__A, (__v4df)__B,
@@ -620,7 +628,7 @@ _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
620628
/// \param __C
621629
/// A 256-bit vector of [8 x float] containing the addend.
622630
/// \returns A 256-bit vector of [8 x float] containing the result.
623-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
631+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
624632
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
625633
{
626634
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
@@ -641,7 +649,7 @@ _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
641649
/// \param __C
642650
/// A 256-bit vector of [4 x double] containing the addend.
643651
/// \returns A 256-bit vector of [4 x double] containing the result.
644-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
652+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
645653
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
646654
{
647655
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
@@ -662,7 +670,7 @@ _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
662670
/// \param __C
663671
/// A 256-bit vector of [8 x float] containing the subtrahend.
664672
/// \returns A 256-bit vector of [8 x float] containing the result.
665-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
673+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
666674
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
667675
{
668676
return (__m256)__builtin_elementwise_fma(-(__v8sf)__A, (__v8sf)__B,
@@ -683,7 +691,7 @@ _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
683691
/// \param __C
684692
/// A 256-bit vector of [4 x double] containing the subtrahend.
685693
/// \returns A 256-bit vector of [4 x double] containing the result.
686-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
694+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
687695
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
688696
{
689697
return (__m256d)__builtin_elementwise_fma(-(__v4df)__A, (__v4df)__B,
@@ -808,5 +816,7 @@ _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
808816

809817
#undef __DEFAULT_FN_ATTRS128
810818
#undef __DEFAULT_FN_ATTRS256
819+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
820+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
811821

812822
#endif /* __FMAINTRIN_H */

0 commit comments

Comments
 (0)