Skip to content

Commit 20c5316

Browse files
phoebewangdvbuka
authored andcommitted
[NFC] Use macros only when __AVX512IFMA__ and __AVXIFMA__ undefined (llvm#162760)
1 parent f244deb commit 20c5316

File tree

2 files changed

+43
-8
lines changed

2 files changed

+43
-8
lines changed

clang/lib/Headers/avx512ifmavlintrin.h

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
#endif
3939

40+
#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__))
4041
#define _mm_madd52hi_epu64(X, Y, Z) \
4142
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
4243
(__v2di)(Z)))
@@ -52,56 +53,83 @@
5253
#define _mm256_madd52lo_epu64(X, Y, Z) \
5354
((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \
5455
(__v4di)(Z)))
56+
#endif
57+
58+
#if defined(__AVX512IFMA__)
59+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
60+
_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
61+
return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
62+
(__v2di)__Z);
63+
}
64+
65+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
66+
_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
67+
return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
68+
(__v4di)__Z);
69+
}
70+
71+
static __inline__ __m128i __DEFAULT_FN_ATTRS128
72+
_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
73+
return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
74+
(__v2di)__Z);
75+
}
76+
77+
static __inline__ __m256i __DEFAULT_FN_ATTRS256
78+
_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
79+
return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
80+
(__v4di)__Z);
81+
}
82+
#endif
5583

5684
static __inline__ __m128i __DEFAULT_FN_ATTRS128
5785
_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
5886
return (__m128i)__builtin_ia32_selectq_128(
59-
__M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W);
87+
__M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X, __Y), (__v2di)__W);
6088
}
6189

6290
static __inline__ __m128i __DEFAULT_FN_ATTRS128
6391
_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
6492
return (__m128i)__builtin_ia32_selectq_128(
65-
__M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
93+
__M, (__v2di)__builtin_ia32_vpmadd52huq128(__X, __Y, __Z),
6694
(__v2di)_mm_setzero_si128());
6795
}
6896

6997
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(
7098
__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
7199
return (__m256i)__builtin_ia32_selectq_256(
72-
__M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W);
100+
__M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X, __Y), (__v4di)__W);
73101
}
74102

75103
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(
76104
__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
77105
return (__m256i)__builtin_ia32_selectq_256(
78-
__M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
106+
__M, (__v4di)__builtin_ia32_vpmadd52huq256(__X, __Y, __Z),
79107
(__v4di)_mm256_setzero_si256());
80108
}
81109

82110
static __inline__ __m128i __DEFAULT_FN_ATTRS128
83111
_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
84112
return (__m128i)__builtin_ia32_selectq_128(
85-
__M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W);
113+
__M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X, __Y), (__v2di)__W);
86114
}
87115

88116
static __inline__ __m128i __DEFAULT_FN_ATTRS128
89117
_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
90118
return (__m128i)__builtin_ia32_selectq_128(
91-
__M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
119+
__M, (__v2di)__builtin_ia32_vpmadd52luq128(__X, __Y, __Z),
92120
(__v2di)_mm_setzero_si128());
93121
}
94122

95123
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(
96124
__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
97125
return (__m256i)__builtin_ia32_selectq_256(
98-
__M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W);
126+
__M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X, __Y), (__v4di)__W);
99127
}
100128

101129
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(
102130
__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
103131
return (__m256i)__builtin_ia32_selectq_256(
104-
__M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
132+
__M, (__v4di)__builtin_ia32_vpmadd52luq256(__X, __Y, __Z),
105133
(__v4di)_mm256_setzero_si256());
106134
}
107135

clang/lib/Headers/avxifmaintrin.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@
3131
__min_vector_width__(256)))
3232
#endif
3333

34+
#if !defined(__AVX512IFMA__) && defined(__AVXIFMA__)
35+
#define _mm_madd52hi_epu64(X, Y, Z) _mm_madd52hi_avx_epu64(X, Y, Z)
36+
#define _mm_madd52lo_epu64(X, Y, Z) _mm_madd52lo_avx_epu64(X, Y, Z)
37+
#define _mm256_madd52hi_epu64(X, Y, Z) _mm256_madd52hi_avx_epu64(X, Y, Z)
38+
#define _mm256_madd52lo_epu64(X, Y, Z) _mm256_madd52lo_avx_epu64(X, Y, Z)
39+
#endif
40+
3441
// must vex-encoding
3542

3643
/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y

0 commit comments

Comments
 (0)