diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 3c446b34e7885..9a1ff8f39734f 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -20,7 +20,13 @@ __target__("avx512bitalg,evex512"), \ __min_vector_width__(512))) -static __inline__ __m512i __DEFAULT_FN_ATTRS +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_popcnt_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); @@ -42,7 +48,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR _mm512_popcnt_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); @@ -80,7 +86,7 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) __B); } - #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index 1b01fe0b9d815..739e78aab753d 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -24,7 +24,15 @@ __target__("avx512vl,avx512bitalg,no-evex512"), \ __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi16(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); @@ -46,7 +54,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi16(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); @@ -68,7 +76,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi8(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); @@ -90,7 +98,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi8(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); @@ -147,5 +155,7 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c index 0468fba7d534c..30d364a283641 100644 --- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c @@ -4,12 +4,14 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s #include +#include "builtin_test_helpers.h" __m512i test_mm512_popcnt_epi16(__m512i __A) { // CHECK-LABEL: test_mm512_popcnt_epi16 // CHECK: @llvm.ctpop.v32i16 return _mm512_popcnt_epi16(__A); } +TEST_CONSTEXPR(match_v32hi(_mm512_popcnt_epi16((__m512i)(__v32hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025, +5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2, 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2)); __m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { // CHECK-LABEL: test_mm512_mask_popcnt_epi16 @@ -29,6 +31,7 @@ __m512i test_mm512_popcnt_epi8(__m512i __A) { // CHECK: @llvm.ctpop.v64i8 return _mm512_popcnt_epi8(__A); } +TEST_CONSTEXPR(match_v64qi(_mm512_popcnt_epi8((__m512i)(__v64qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3)); __m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { // CHECK-LABEL: test_mm512_mask_popcnt_epi8 diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c index 767123dbd6e24..b53410ae43297 100644 --- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c @@ -4,12 +4,14 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s #include +#include "builtin_test_helpers.h" __m256i test_mm256_popcnt_epi16(__m256i __A) { // CHECK-LABEL: test_mm256_popcnt_epi16 // CHECK: @llvm.ctpop.v16i16 return _mm256_popcnt_epi16(__A); } +TEST_CONSTEXPR(match_v16hi(_mm256_popcnt_epi16((__m256i)(__v16hi){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 15, 14, 1, 0, 8, 1, 9, 2, 2, 4, 2, 6, 2, 9, 2)); __m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { // CHECK-LABEL: test_mm256_mask_popcnt_epi16 @@ -29,6 +31,7 @@ __m128i test_mm_popcnt_epi16(__m128i __A) { // CHECK: @llvm.ctpop.v8i16 return _mm_popcnt_epi16(__A); } +TEST_CONSTEXPR(match_v8hi(_mm_popcnt_epi16((__m128i)(__v8hi){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 15, 14, 1, 0, 8, 1, 9)); __m128i test_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { // CHECK-LABEL: test_mm_mask_popcnt_epi16 @@ -48,6 +51,7 @@ __m256i test_mm256_popcnt_epi8(__m256i __A) { // CHECK: @llvm.ctpop.v32i8 return _mm256_popcnt_epi8(__A); } +TEST_CONSTEXPR(match_v32qi(_mm256_popcnt_epi8((__m256i)(__v32qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73, +5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3, 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3)); __m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { // CHECK-LABEL: test_mm256_mask_popcnt_epi8 @@ -67,6 +71,7 @@ __m128i test_mm_popcnt_epi8(__m128i __A) { // CHECK: @llvm.ctpop.v16i8 return _mm_popcnt_epi8(__A); } +TEST_CONSTEXPR(match_v16qi(_mm_popcnt_epi8((__m128i)(__v16qi){+5, -3, -10, +8, 0, -16, +16, -16, +3, +9, +15, +33, +63, +33, +53, +73}), 2, 7, 6, 1, 0, 4, 1, 4, 2, 2, 4, 2, 6, 2, 4, 3)); __m128i test_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { // CHECK-LABEL: test_mm_mask_popcnt_epi8 diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h index f719694d41e25..5d4ee7d05d356 100644 --- a/clang/test/CodeGen/X86/builtin_test_helpers.h +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -122,6 +122,36 @@ constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f, return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; } +constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, short __e03, short __e04, short __e05, short __e06, short __e07, + short __e08, short __e09, short __e10, short __e11, short __e12, short __e13, short __e14, short __e15, + short __e16, short __e17, short __e18, short __e19, short __e20, short __e21, short __e22, short __e23, + short __e24, short __e25, short __e26, short __e27, short __e28, short __e29, short __e30, short __e31) { + __v32hi v = (__v32hi)_v; + return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 && v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 && + v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 && v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 && + v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 && v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 && + v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 && v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31; +} + +constexpr bool match_v64qi(__m512i _v, char __e00, char __e01, char __e02, char __e03, char __e04, char __e05, char __e06, char __e07, + char __e08, char __e09, char __e10, char __e11, char __e12, char __e13, char __e14, char __e15, + char __e16, char __e17, char __e18, char __e19, char __e20, char __e21, char __e22, char __e23, + char __e24, char __e25, char __e26, char __e27, char __e28, char __e29, char __e30, char __e31, + char __e32, char __e33, char __e34, char __e35, char __e36, char __e37, char __e38, char __e39, + char __e40, char __e41, char __e42, char __e43, char __e44, char __e45, char __e46, char __e47, + char __e48, char __e49, char __e50, char __e51, char __e52, char __e53, char __e54, char __e55, + char __e56, char __e57, char __e58, char __e59, char __e60, char __e61, char __e62, char __e63) { + __v64qi v = (__v64qi)_v; + return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 && v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 && + v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 && v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 && + v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 && v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 && + v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 && v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 && + v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 && v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 && + v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 && v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 && + v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 && v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 && + v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 && v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63; +} + #define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__) #else