|
8 | 8 | *===-----------------------------------------------------------------------=== |
9 | 9 | */ |
10 | 10 | #ifndef __IMMINTRIN_H |
11 | | -#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." |
| 11 | +#error \ |
| 12 | + "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." |
12 | 13 | #endif |
13 | 14 |
|
14 | 15 | #ifndef __IFMAVLINTRIN_H |
15 | 16 | #define __IFMAVLINTRIN_H |
16 | 17 |
|
17 | 18 | /* Define the default attributes for the functions in this file. */ |
| 19 | +#if defined(__cplusplus) && (__cplusplus >= 201103L) |
| 20 | +#define __DEFAULT_FN_ATTRS128 \ |
| 21 | + constexpr __attribute__((__always_inline__, __nodebug__, \ |
| 22 | + __target__("avx512ifma,avx512vl"), \ |
| 23 | + __min_vector_width__(128))) |
| 24 | +#define __DEFAULT_FN_ATTRS256 \ |
| 25 | + constexpr __attribute__((__always_inline__, __nodebug__, \ |
| 26 | + __target__("avx512ifma,avx512vl"), \ |
| 27 | + __min_vector_width__(256))) |
| 28 | +#else |
18 | 29 | #define __DEFAULT_FN_ATTRS128 \ |
19 | 30 | __attribute__((__always_inline__, __nodebug__, \ |
20 | 31 | __target__("avx512ifma,avx512vl"), \ |
|
24 | 35 | __target__("avx512ifma,avx512vl"), \ |
25 | 36 | __min_vector_width__(256))) |
26 | 37 |
|
| 38 | +#endif |
| 39 | + |
27 | 40 | #define _mm_madd52hi_epu64(X, Y, Z) \ |
28 | 41 | ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ |
29 | 42 | (__v2di)(Z))) |
|
41 | 54 | (__v4di)(Z))) |
42 | 55 |
|
43 | 56 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
44 | | -_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) |
45 | | -{ |
46 | | - return (__m128i)__builtin_ia32_selectq_128(__M, |
47 | | - (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), |
48 | | - (__v2di)__W); |
| 57 | +_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { |
| 58 | + return (__m128i)__builtin_ia32_selectq_128( |
| 59 | + __M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W); |
49 | 60 | } |
50 | 61 |
|
51 | 62 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
52 | | -_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) |
53 | | -{ |
54 | | - return (__m128i)__builtin_ia32_selectq_128(__M, |
55 | | - (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), |
56 | | - (__v2di)_mm_setzero_si128()); |
| 63 | +_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { |
| 64 | + return (__m128i)__builtin_ia32_selectq_128( |
| 65 | + __M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), |
| 66 | + (__v2di)_mm_setzero_si128()); |
57 | 67 | } |
58 | 68 |
|
59 | | -static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
60 | | -_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) |
61 | | -{ |
62 | | - return (__m256i)__builtin_ia32_selectq_256(__M, |
63 | | - (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), |
64 | | - (__v4di)__W); |
| 69 | +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64( |
| 70 | + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { |
| 71 | + return (__m256i)__builtin_ia32_selectq_256( |
| 72 | + __M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W); |
65 | 73 | } |
66 | 74 |
|
67 | | -static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
68 | | -_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) |
69 | | -{ |
70 | | - return (__m256i)__builtin_ia32_selectq_256(__M, |
71 | | - (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), |
72 | | - (__v4di)_mm256_setzero_si256()); |
| 75 | +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64( |
| 76 | + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { |
| 77 | + return (__m256i)__builtin_ia32_selectq_256( |
| 78 | + __M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), |
| 79 | + (__v4di)_mm256_setzero_si256()); |
73 | 80 | } |
74 | 81 |
|
75 | 82 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
76 | | -_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) |
77 | | -{ |
78 | | - return (__m128i)__builtin_ia32_selectq_128(__M, |
79 | | - (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), |
80 | | - (__v2di)__W); |
| 83 | +_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { |
| 84 | + return (__m128i)__builtin_ia32_selectq_128( |
| 85 | + __M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W); |
81 | 86 | } |
82 | 87 |
|
83 | 88 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
84 | | -_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) |
85 | | -{ |
86 | | - return (__m128i)__builtin_ia32_selectq_128(__M, |
87 | | - (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), |
88 | | - (__v2di)_mm_setzero_si128()); |
| 89 | +_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { |
| 90 | + return (__m128i)__builtin_ia32_selectq_128( |
| 91 | + __M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), |
| 92 | + (__v2di)_mm_setzero_si128()); |
89 | 93 | } |
90 | 94 |
|
91 | | -static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
92 | | -_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) |
93 | | -{ |
94 | | - return (__m256i)__builtin_ia32_selectq_256(__M, |
95 | | - (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), |
96 | | - (__v4di)__W); |
| 95 | +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64( |
| 96 | + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { |
| 97 | + return (__m256i)__builtin_ia32_selectq_256( |
| 98 | + __M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W); |
97 | 99 | } |
98 | 100 |
|
99 | | -static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
100 | | -_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) |
101 | | -{ |
102 | | - return (__m256i)__builtin_ia32_selectq_256(__M, |
103 | | - (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), |
104 | | - (__v4di)_mm256_setzero_si256()); |
| 101 | +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64( |
| 102 | + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { |
| 103 | + return (__m256i)__builtin_ia32_selectq_256( |
| 104 | + __M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), |
| 105 | + (__v4di)_mm256_setzero_si256()); |
105 | 106 | } |
106 | 107 |
|
107 | | - |
108 | 108 | #undef __DEFAULT_FN_ATTRS128 |
109 | 109 | #undef __DEFAULT_FN_ATTRS256 |
110 | 110 |
|
|
0 commit comments