Skip to content

[Headers][X86] Allow BITALG vpopcntw/vpopcntb intrinsics to be used in constexpr #152701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 8, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Aug 8, 2025

Matches VPOPCNTDQ handling

@RKSimon RKSimon requested a review from phoebewang August 8, 2025 12:36
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Aug 8, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 8, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Matches VPOPCNTDQ handling


Full diff: https://github.com/llvm/llvm-project/pull/152701.diff

5 Files Affected:

  • (modified) clang/lib/Headers/avx512bitalgintrin.h (+9-3)
  • (modified) clang/lib/Headers/avx512vlbitalgintrin.h (+14-4)
  • (modified) clang/test/CodeGen/X86/avx512bitalg-builtins.c (+3)
  • (modified) clang/test/CodeGen/X86/avx512vlbitalg-builtins.c (+5)
  • (modified) clang/test/CodeGen/X86/builtin_test_helpers.h (+30)
diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h
index 3c446b34e7885..9a1ff8f39734f 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -20,7 +20,13 @@
                  __target__("avx512bitalg,evex512"),                           \
                  __min_vector_width__(512)))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_popcnt_epi16(__m512i __A)
 {
   return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
@@ -42,7 +48,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
               __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_popcnt_epi8(__m512i __A)
 {
   return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
@@ -80,7 +86,7 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
               __B);
 }
 
-
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h
index 1b01fe0b9d815..739e78aab753d 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -24,7 +24,15 @@
                  __target__("avx512vl,avx512bitalg,no-evex512"),               \
                  __min_vector_width__(256)))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_popcnt_epi16(__m256i __A)
 {
   return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
@@ -46,7 +54,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
               __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_popcnt_epi16(__m128i __A)
 {
   return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
@@ -68,7 +76,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
               __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_popcnt_epi8(__m256i __A)
 {
   return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
@@ -90,7 +98,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
               __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_popcnt_epi8(__m128i __A)
 {
   return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
@@ -147,5 +155,7 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif
diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
index 0468fba7d534c..30d364a283641 100644
--- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
@@ -4,12 +4,14 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m512i test_mm512_popcnt_epi16(__m512i __A) {
   // CHECK-LABEL: test_mm512_popcnt_epi16
   // CHECK: @llvm.ctpop.v32i16
   return _mm512_popcnt_epi16(__A);
 }
+TEST_CONSTEXPR(match_v32hi(_mm512_popcnt_epi16((__m512i)(__v32hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025, +5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2, 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2));
 
 __m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_popcnt_epi16
@@ -29,6 +31,7 @@ __m512i test_mm512_popcnt_epi8(__m512i __A) {
   // CHECK: @llvm.ctpop.v64i8
   return _mm512_popcnt_epi8(__A);
 }
+TEST_CONSTEXPR(match_v64qi(_mm512_popcnt_epi8((__m512i)(__v64qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3));
 
 __m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
   // CHECK-LABEL: test_mm512_mask_popcnt_epi8
diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
index 767123dbd6e24..b53410ae43297 100644
--- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c
@@ -4,12 +4,14 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m256i test_mm256_popcnt_epi16(__m256i __A) {
   // CHECK-LABEL: test_mm256_popcnt_epi16
   // CHECK: @llvm.ctpop.v16i16
   return _mm256_popcnt_epi16(__A);
 }
+TEST_CONSTEXPR(match_v16hi(_mm256_popcnt_epi16((__m256i)(__v16hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2));
 
 __m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_popcnt_epi16
@@ -29,6 +31,7 @@ __m128i test_mm_popcnt_epi16(__m128i __A) {
   // CHECK: @llvm.ctpop.v8i16
   return _mm_popcnt_epi16(__A);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_popcnt_epi16((__m128i)(__v8hi){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 15, 14, 1, 0, 8, 1, 9));
 
 __m128i test_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_popcnt_epi16
@@ -48,6 +51,7 @@ __m256i test_mm256_popcnt_epi8(__m256i __A) {
   // CHECK: @llvm.ctpop.v32i8
   return _mm256_popcnt_epi8(__A);
 }
+TEST_CONSTEXPR(match_v32qi(_mm256_popcnt_epi8((__m256i)(__v32qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3));
 
 __m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_popcnt_epi8
@@ -67,6 +71,7 @@ __m128i test_mm_popcnt_epi8(__m128i __A) {
   // CHECK: @llvm.ctpop.v16i8
   return _mm_popcnt_epi8(__A);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_popcnt_epi8((__m128i)(__v16qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3));
 
 __m128i test_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_popcnt_epi8
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
index f719694d41e25..5d4ee7d05d356 100644
--- a/clang/test/CodeGen/X86/builtin_test_helpers.h
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -122,6 +122,36 @@ constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f,
   return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
 }
 
+constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, short __e03, short __e04, short __e05, short __e06, short __e07,
+                                       short __e08, short __e09, short __e10, short __e11, short __e12, short __e13, short __e14, short __e15,
+                                       short __e16, short __e17, short __e18, short __e19, short __e20, short __e21, short __e22, short __e23,
+                                       short __e24, short __e25, short __e26, short __e27, short __e28, short __e29, short __e30, short __e31) {
+  __v32hi v = (__v32hi)_v;
+  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 && v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] ==  __e07 &&
+         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 && v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] ==  __e15 &&
+         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 && v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] ==  __e23 &&
+         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 && v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] ==  __e31;
+}
+
+constexpr bool match_v64qi(__m512i _v, char __e00, char __e01, char __e02, char __e03, char __e04, char __e05, char __e06, char __e07,
+                                       char __e08, char __e09, char __e10, char __e11, char __e12, char __e13, char __e14, char __e15,
+                                       char __e16, char __e17, char __e18, char __e19, char __e20, char __e21, char __e22, char __e23,
+                                       char __e24, char __e25, char __e26, char __e27, char __e28, char __e29, char __e30, char __e31,
+                                       char __e32, char __e33, char __e34, char __e35, char __e36, char __e37, char __e38, char __e39,
+                                       char __e40, char __e41, char __e42, char __e43, char __e44, char __e45, char __e46, char __e47,
+                                       char __e48, char __e49, char __e50, char __e51, char __e52, char __e53, char __e54, char __e55,
+                                       char __e56, char __e57, char __e58, char __e59, char __e60, char __e61, char __e62, char __e63) {
+  __v64qi v = (__v64qi)_v;
+  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 && v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
+         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 && v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
+         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 && v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
+         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 && v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 &&
+         v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 && v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 &&
+         v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 && v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 &&
+         v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 && v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 &&
+         v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 && v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63;
+}
+
 #define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
 
 #else

Copy link

github-actions bot commented Aug 8, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff HEAD~1 HEAD --extensions h,c -- clang/lib/Headers/avx512bitalgintrin.h clang/lib/Headers/avx512vlbitalgintrin.h clang/test/CodeGen/X86/avx512bitalg-builtins.c clang/test/CodeGen/X86/avx512vlbitalg-builtins.c clang/test/CodeGen/X86/builtin_test_helpers.h
View the diff from clang-format here.
diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h
index 9a1ff8f39..93c2685f0 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -27,8 +27,7 @@
 #endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi16(__m512i __A)
-{
+_mm512_popcnt_epi16(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
 }
 
@@ -49,8 +48,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_popcnt_epi8(__m512i __A)
-{
+_mm512_popcnt_epi8(__m512i __A) {
   return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
 }
 
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h
index 739e78aab..4c4c2ca62 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -33,8 +33,7 @@
 #endif
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_popcnt_epi16(__m256i __A)
-{
+_mm256_popcnt_epi16(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
 }
 
@@ -55,8 +54,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi16(__m128i __A)
-{
+_mm_popcnt_epi16(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
 }
 
@@ -77,8 +75,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_popcnt_epi8(__m256i __A)
-{
+_mm256_popcnt_epi8(__m256i __A) {
   return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
 }
 
@@ -99,8 +96,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_popcnt_epi8(__m128i __A)
-{
+_mm_popcnt_epi8(__m128i __A) {
   return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
 }
 

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@RKSimon RKSimon merged commit f169893 into llvm:main Aug 8, 2025
12 of 13 checks passed
@RKSimon RKSimon deleted the x86-vpopcntbw-constexpr branch August 8, 2025 15:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants