Skip to content

Commit e16db4c

Browse files
committed
[X86] Allow AVX512 funnel shift intrinsics to be used in constexpr
Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial. Just one more step towards #153152 - just VBMI2 funnel shifts by immediate
1 parent 6bcb172 commit e16db4c

File tree

4 files changed

+127
-55
lines changed

4 files changed

+127
-55
lines changed

clang/lib/Headers/avx512vbmi2intrin.h

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), \
2020
__min_vector_width__(512)))
2121

22+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
23+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
24+
#else
25+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
26+
#endif
27+
2228
static __inline__ __m512i __DEFAULT_FN_ATTRS
2329
_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
2430
{
@@ -213,140 +219,140 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
213219
(__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
214220
(__v32hi)_mm512_setzero_si512()))
215221

216-
static __inline__ __m512i __DEFAULT_FN_ATTRS
222+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
217223
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
218224
{
219225
return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B,
220226
(__v8du)__C);
221227
}
222228

223-
static __inline__ __m512i __DEFAULT_FN_ATTRS
229+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
224230
_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
225231
{
226232
return (__m512i)__builtin_ia32_selectq_512(__U,
227233
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
228234
(__v8di)__A);
229235
}
230236

231-
static __inline__ __m512i __DEFAULT_FN_ATTRS
237+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
232238
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
233239
{
234240
return (__m512i)__builtin_ia32_selectq_512(__U,
235241
(__v8di)_mm512_shldv_epi64(__A, __B, __C),
236242
(__v8di)_mm512_setzero_si512());
237243
}
238244

239-
static __inline__ __m512i __DEFAULT_FN_ATTRS
245+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
240246
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
241247
{
242248
return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B,
243249
(__v16su)__C);
244250
}
245251

246-
static __inline__ __m512i __DEFAULT_FN_ATTRS
252+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
247253
_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
248254
{
249255
return (__m512i)__builtin_ia32_selectd_512(__U,
250256
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
251257
(__v16si)__A);
252258
}
253259

254-
static __inline__ __m512i __DEFAULT_FN_ATTRS
260+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
255261
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
256262
{
257263
return (__m512i)__builtin_ia32_selectd_512(__U,
258264
(__v16si)_mm512_shldv_epi32(__A, __B, __C),
259265
(__v16si)_mm512_setzero_si512());
260266
}
261267

262-
static __inline__ __m512i __DEFAULT_FN_ATTRS
268+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
263269
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
264270
{
265271
return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B,
266272
(__v32hu)__C);
267273
}
268274

269-
static __inline__ __m512i __DEFAULT_FN_ATTRS
275+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
270276
_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
271277
{
272278
return (__m512i)__builtin_ia32_selectw_512(__U,
273279
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
274280
(__v32hi)__A);
275281
}
276282

277-
static __inline__ __m512i __DEFAULT_FN_ATTRS
283+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
278284
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
279285
{
280286
return (__m512i)__builtin_ia32_selectw_512(__U,
281287
(__v32hi)_mm512_shldv_epi16(__A, __B, __C),
282288
(__v32hi)_mm512_setzero_si512());
283289
}
284290

285-
static __inline__ __m512i __DEFAULT_FN_ATTRS
291+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
286292
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
287293
{
288294
// Ops __A and __B are swapped.
289295
return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A,
290296
(__v8du)__C);
291297
}
292298

293-
static __inline__ __m512i __DEFAULT_FN_ATTRS
299+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
294300
_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
295301
{
296302
return (__m512i)__builtin_ia32_selectq_512(__U,
297303
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
298304
(__v8di)__A);
299305
}
300306

301-
static __inline__ __m512i __DEFAULT_FN_ATTRS
307+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
302308
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
303309
{
304310
return (__m512i)__builtin_ia32_selectq_512(__U,
305311
(__v8di)_mm512_shrdv_epi64(__A, __B, __C),
306312
(__v8di)_mm512_setzero_si512());
307313
}
308314

309-
static __inline__ __m512i __DEFAULT_FN_ATTRS
315+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
310316
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
311317
{
312318
// Ops __A and __B are swapped.
313319
return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A,
314320
(__v16su)__C);
315321
}
316322

317-
static __inline__ __m512i __DEFAULT_FN_ATTRS
323+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
318324
_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
319325
{
320326
return (__m512i) __builtin_ia32_selectd_512(__U,
321327
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
322328
(__v16si)__A);
323329
}
324330

325-
static __inline__ __m512i __DEFAULT_FN_ATTRS
331+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
326332
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
327333
{
328334
return (__m512i) __builtin_ia32_selectd_512(__U,
329335
(__v16si)_mm512_shrdv_epi32(__A, __B, __C),
330336
(__v16si)_mm512_setzero_si512());
331337
}
332338

333-
static __inline__ __m512i __DEFAULT_FN_ATTRS
339+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
334340
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
335341
{
336342
// Ops __A and __B are swapped.
337343
return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A,
338344
(__v32hu)__C);
339345
}
340346

341-
static __inline__ __m512i __DEFAULT_FN_ATTRS
347+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
342348
_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
343349
{
344350
return (__m512i)__builtin_ia32_selectw_512(__U,
345351
(__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
346352
(__v32hi)__A);
347353
}
348354

349-
static __inline__ __m512i __DEFAULT_FN_ATTRS
355+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
350356
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
351357
{
352358
return (__m512i)__builtin_ia32_selectw_512(__U,
@@ -356,6 +362,7 @@ _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
356362

357363

358364
#undef __DEFAULT_FN_ATTRS
365+
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
359366

360367
#endif
361368

0 commit comments

Comments
 (0)