Skip to content

Commit f169893

Browse files
authored
[Headers][X86] Allow BITALG vpopcntw/vpopcntb intrinsics to be used in constexpr (#152701)
Matches VPOPCNTDQ handling
1 parent 478b415 commit f169893

File tree

5 files changed

+61
-7
lines changed

5 files changed

+61
-7
lines changed

clang/lib/Headers/avx512bitalgintrin.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,13 @@
2020
__target__("avx512bitalg,evex512"), \
2121
__min_vector_width__(512)))
2222

23-
static __inline__ __m512i __DEFAULT_FN_ATTRS
23+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
24+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
25+
#else
26+
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
27+
#endif
28+
29+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
2430
_mm512_popcnt_epi16(__m512i __A)
2531
{
2632
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
@@ -42,7 +48,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
4248
__B);
4349
}
4450

45-
static __inline__ __m512i __DEFAULT_FN_ATTRS
51+
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
4652
_mm512_popcnt_epi8(__m512i __A)
4753
{
4854
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
@@ -80,7 +86,7 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
8086
__B);
8187
}
8288

83-
8489
#undef __DEFAULT_FN_ATTRS
90+
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
8591

8692
#endif

clang/lib/Headers/avx512vlbitalgintrin.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,15 @@
2424
__target__("avx512vl,avx512bitalg,no-evex512"), \
2525
__min_vector_width__(256)))
2626

27-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
27+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
28+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
29+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
30+
#else
31+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
32+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
33+
#endif
34+
35+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2836
_mm256_popcnt_epi16(__m256i __A)
2937
{
3038
return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
@@ -46,7 +54,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
4654
__B);
4755
}
4856

49-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
57+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
5058
_mm_popcnt_epi16(__m128i __A)
5159
{
5260
return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
@@ -68,7 +76,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
6876
__B);
6977
}
7078

71-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
79+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
7280
_mm256_popcnt_epi8(__m256i __A)
7381
{
7482
return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
@@ -90,7 +98,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
9098
__B);
9199
}
92100

93-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
101+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
94102
_mm_popcnt_epi8(__m128i __A)
95103
{
96104
return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
@@ -147,5 +155,7 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
147155

148156
#undef __DEFAULT_FN_ATTRS128
149157
#undef __DEFAULT_FN_ATTRS256
158+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
159+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
150160

151161
#endif

clang/test/CodeGen/X86/avx512bitalg-builtins.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s
55

66
#include <immintrin.h>
7+
#include "builtin_test_helpers.h"
78

89
__m512i test_mm512_popcnt_epi16(__m512i __A) {
910
// CHECK-LABEL: test_mm512_popcnt_epi16
1011
// CHECK: @llvm.ctpop.v32i16
1112
return _mm512_popcnt_epi16(__A);
1213
}
14+
TEST_CONSTEXPR(match_v32hi(_mm512_popcnt_epi16((__m512i)(__v32hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025, +5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2, 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2));
1315

1416
__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
1517
// CHECK-LABEL: test_mm512_mask_popcnt_epi16
@@ -29,6 +31,7 @@ __m512i test_mm512_popcnt_epi8(__m512i __A) {
2931
// CHECK: @llvm.ctpop.v64i8
3032
return _mm512_popcnt_epi8(__A);
3133
}
34+
TEST_CONSTEXPR(match_v64qi(_mm512_popcnt_epi8((__m512i)(__v64qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3));
3235

3336
__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
3437
// CHECK-LABEL: test_mm512_mask_popcnt_epi8

clang/test/CodeGen/X86/avx512vlbitalg-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
55

66
#include <immintrin.h>
7+
#include "builtin_test_helpers.h"
78

89
__m256i test_mm256_popcnt_epi16(__m256i __A) {
910
// CHECK-LABEL: test_mm256_popcnt_epi16
1011
// CHECK: @llvm.ctpop.v16i16
1112
return _mm256_popcnt_epi16(__A);
1213
}
14+
TEST_CONSTEXPR(match_v16hi(_mm256_popcnt_epi16((__m256i)(__v16hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2));
1315

1416
__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
1517
// CHECK-LABEL: test_mm256_mask_popcnt_epi16
@@ -29,6 +31,7 @@ __m128i test_mm_popcnt_epi16(__m128i __A) {
2931
// CHECK: @llvm.ctpop.v8i16
3032
return _mm_popcnt_epi16(__A);
3133
}
34+
TEST_CONSTEXPR(match_v8hi(_mm_popcnt_epi16((__m128i)(__v8hi){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 15, 14, 1, 0, 8, 1, 9));
3235

3336
__m128i test_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
3437
// CHECK-LABEL: test_mm_mask_popcnt_epi16
@@ -48,6 +51,7 @@ __m256i test_mm256_popcnt_epi8(__m256i __A) {
4851
// CHECK: @llvm.ctpop.v32i8
4952
return _mm256_popcnt_epi8(__A);
5053
}
54+
TEST_CONSTEXPR(match_v32qi(_mm256_popcnt_epi8((__m256i)(__v32qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3));
5155

5256
__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
5357
// CHECK-LABEL: test_mm256_mask_popcnt_epi8
@@ -67,6 +71,7 @@ __m128i test_mm_popcnt_epi8(__m128i __A) {
6771
// CHECK: @llvm.ctpop.v16i8
6872
return _mm_popcnt_epi8(__A);
6973
}
74+
TEST_CONSTEXPR(match_v16qi(_mm_popcnt_epi8((__m128i)(__v16qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3));
7075

7176
__m128i test_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
7277
// CHECK-LABEL: test_mm_mask_popcnt_epi8

clang/test/CodeGen/X86/builtin_test_helpers.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,36 @@ constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f,
122122
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
123123
}
124124

125+
constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, short __e03, short __e04, short __e05, short __e06, short __e07,
126+
short __e08, short __e09, short __e10, short __e11, short __e12, short __e13, short __e14, short __e15,
127+
short __e16, short __e17, short __e18, short __e19, short __e20, short __e21, short __e22, short __e23,
128+
short __e24, short __e25, short __e26, short __e27, short __e28, short __e29, short __e30, short __e31) {
129+
__v32hi v = (__v32hi)_v;
130+
return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 && v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
131+
v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 && v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
132+
v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 && v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
133+
v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 && v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31;
134+
}
135+
136+
constexpr bool match_v64qi(__m512i _v, char __e00, char __e01, char __e02, char __e03, char __e04, char __e05, char __e06, char __e07,
137+
char __e08, char __e09, char __e10, char __e11, char __e12, char __e13, char __e14, char __e15,
138+
char __e16, char __e17, char __e18, char __e19, char __e20, char __e21, char __e22, char __e23,
139+
char __e24, char __e25, char __e26, char __e27, char __e28, char __e29, char __e30, char __e31,
140+
char __e32, char __e33, char __e34, char __e35, char __e36, char __e37, char __e38, char __e39,
141+
char __e40, char __e41, char __e42, char __e43, char __e44, char __e45, char __e46, char __e47,
142+
char __e48, char __e49, char __e50, char __e51, char __e52, char __e53, char __e54, char __e55,
143+
char __e56, char __e57, char __e58, char __e59, char __e60, char __e61, char __e62, char __e63) {
144+
__v64qi v = (__v64qi)_v;
145+
return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 && v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
146+
v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 && v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
147+
v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 && v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
148+
v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 && v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 &&
149+
v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 && v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 &&
150+
v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 && v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 &&
151+
v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 && v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 &&
152+
v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 && v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63;
153+
}
154+
125155
#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
126156

127157
#else

0 commit comments

Comments
 (0)