Skip to content

Commit 44061d1

Browse files
notnotharshRKSimon
andauthored
[Headers][X86] Allow AVX512 masked blend intrinsics to be used in constexpr (#156234)
This patch enables AVX-512 masked blend intrinsics to be usable in constant expressions (`constexpr`) across various vector widths (128-bit, 256-bit, 512-bit). It updates the respective Clang headers to include the `__DEFAULT_FN_ATTRS_CONSTEXPR` annotation where applicable, and supplements the change with thorough `TEST_CONSTEXPR` checks in the X86 CodeGen test suite to validate constexpr evaluation. Fixes #155796. --------- Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 273917e commit 44061d1

File tree

10 files changed

+261
-51
lines changed

10 files changed

+261
-51
lines changed

clang/lib/Headers/avx512bwintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -464,17 +464,15 @@ _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
464464
(__v32hi)_mm512_setzero_si512());
465465
}
466466

467-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
468-
_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
469-
{
467+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
468+
_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
470469
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
471470
(__v64qi) __W,
472471
(__v64qi) __A);
473472
}
474473

475-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
476-
_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
477-
{
474+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
475+
_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
478476
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
479477
(__v32hi) __W,
480478
(__v32hi) __A);

clang/lib/Headers/avx512fintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3209,33 +3209,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
32093209

32103210
/* Vector Blend */
32113211

3212-
static __inline __m512d __DEFAULT_FN_ATTRS512
3213-
_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3214-
{
3212+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
3213+
_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
32153214
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
32163215
(__v8df) __W,
32173216
(__v8df) __A);
32183217
}
32193218

3220-
static __inline __m512 __DEFAULT_FN_ATTRS512
3221-
_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3222-
{
3219+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
3220+
_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
32233221
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
32243222
(__v16sf) __W,
32253223
(__v16sf) __A);
32263224
}
32273225

3228-
static __inline __m512i __DEFAULT_FN_ATTRS512
3229-
_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3230-
{
3226+
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3227+
_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
32313228
return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
32323229
(__v8di) __W,
32333230
(__v8di) __A);
32343231
}
32353232

3236-
static __inline __m512i __DEFAULT_FN_ATTRS512
3237-
_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3238-
{
3233+
static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
3234+
_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
32393235
return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
32403236
(__v16si) __W,
32413237
(__v16si) __A);

clang/lib/Headers/avx512fp16intrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3309,7 +3309,7 @@ _mm512_reduce_min_ph(__m512h __V) {
33093309
return __builtin_ia32_reduce_fmin_ph512(__V);
33103310
}
33113311

3312-
static __inline__ __m512h __DEFAULT_FN_ATTRS512
3312+
static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
33133313
_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
33143314
return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
33153315
(__v32hf)__A);

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -452,33 +452,29 @@ _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
452452
(__v8hi)_mm_setzero_si128());
453453
}
454454

455-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
456-
_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
457-
{
455+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
456+
_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
458457
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
459458
(__v16qi) __W,
460459
(__v16qi) __A);
461460
}
462461

463-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
464-
_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
465-
{
462+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
463+
_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
466464
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
467465
(__v32qi) __W,
468466
(__v32qi) __A);
469467
}
470468

471-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
472-
_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
473-
{
469+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
470+
_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
474471
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
475472
(__v8hi) __W,
476473
(__v8hi) __A);
477474
}
478475

479-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
480-
_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
481-
{
476+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
477+
_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
482478
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
483479
(__v16hi) __W,
484480
(__v16hi) __A);

clang/lib/Headers/avx512vlfp16intrin.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,14 +1995,13 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
19951995
(__v8sf)__C, (__mmask8)__U);
19961996
}
19971997

1998-
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
1999-
__m128h __A,
2000-
__m128h __W) {
1998+
static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
1999+
_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
20012000
return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
20022001
(__v8hf)__A);
20032002
}
20042003

2005-
static __inline__ __m256h __DEFAULT_FN_ATTRS256
2004+
static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
20062005
_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
20072006
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
20082007
(__v16hf)__A);

clang/lib/Headers/avx512vlintrin.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,57 +1498,57 @@ _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
14981498
(__v8sf)_mm256_setzero_ps());
14991499
}
15001500

1501-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1502-
_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1501+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1502+
_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
15031503
return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
15041504
(__v4si) __W,
15051505
(__v4si) __A);
15061506
}
15071507

1508-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1509-
_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1508+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1509+
_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
15101510
return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
15111511
(__v8si) __W,
15121512
(__v8si) __A);
15131513
}
15141514

1515-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1516-
_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1515+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1516+
_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
15171517
return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
15181518
(__v2df) __W,
15191519
(__v2df) __A);
15201520
}
15211521

1522-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
1523-
_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1522+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1523+
_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
15241524
return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
15251525
(__v4df) __W,
15261526
(__v4df) __A);
15271527
}
15281528

1529-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1530-
_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1529+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1530+
_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
15311531
return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
15321532
(__v4sf) __W,
15331533
(__v4sf) __A);
15341534
}
15351535

1536-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
1537-
_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1536+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1537+
_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
15381538
return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
15391539
(__v8sf) __W,
15401540
(__v8sf) __A);
15411541
}
15421542

1543-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1544-
_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1543+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1544+
_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
15451545
return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
15461546
(__v2di) __W,
15471547
(__v2di) __A);
15481548
}
15491549

1550-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1551-
_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1550+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1551+
_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
15521552
return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
15531553
(__v4di) __W,
15541554
(__v4di) __A);

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -854,11 +854,57 @@ __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
854854
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
855855
return _mm512_mask_blend_epi8(__U,__A,__W);
856856
}
857+
TEST_CONSTEXPR(match_v64qi(
858+
_mm512_mask_blend_epi8(
859+
(__mmask64) 0x00000001,
860+
(__m512i)(__v64qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
861+
(__m512i)(__v64qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
862+
),
863+
10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
864+
));
857865
__m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
858866
// CHECK-LABEL: test_mm512_mask_blend_epi16
859867
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
860868
return _mm512_mask_blend_epi16(__U,__A,__W);
861869
}
870+
TEST_CONSTEXPR(match_v32hi(
871+
_mm512_mask_blend_epi16(
872+
(__mmask32) 0x00000001,
873+
(__m512i)(__v32hi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
874+
(__m512i)(__v32hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
875+
),
876+
10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
877+
));
878+
879+
__m512i test_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
880+
// CHECK-LABEL: test_mm512_mask_blend_epi32
881+
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
882+
return _mm512_mask_blend_epi32(__U, __A, __W);
883+
}
884+
TEST_CONSTEXPR(match_v16si(
885+
_mm512_mask_blend_epi32(
886+
(__mmask16) 0x0001,
887+
(__m512i)(__v16si) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
888+
(__m512i)(__v16si){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
889+
),
890+
10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
891+
));
892+
893+
__m512i test_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
894+
// CHECK-LABEL: test_mm512_mask_blend_epi64
895+
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
896+
return _mm512_mask_blend_epi64(__U, __A, __W);
897+
}
898+
899+
TEST_CONSTEXPR(match_v8di(
900+
_mm512_mask_blend_epi64(
901+
(__mmask8)0x01,
902+
(__m512i)(__v8di){2, 2, 2, 2, 2, 2, 2, 2},
903+
(__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}
904+
),
905+
10, 2, 2, 2, 2, 2, 2, 2
906+
));
907+
862908
__m512i test_mm512_abs_epi8(__m512i __A) {
863909
// CHECK-LABEL: test_mm512_abs_epi8
864910
// CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false)

0 commit comments

Comments
 (0)