Skip to content

Commit 8bdd909

Browse files
authored
[Headers][X86] Add constexpr support for some AVX[512] intrinsics. (#156567)
The following AVX[512] intrinsics are now constexpr: - `_mm_mask_cvtepi32_pd` - `_mm_maskz_cvtepi32_pd` - `_mm_mask_cvtepi32_ps` - `_mm_maskz_cvtepi32_ps` - `_mm_cvtepu32_pd` - `_mm_mask_cvtepu32_pd` - `_mm_maskz_cvtepu32_pd` - `_mm_cvtepu32_ps` - `_mm_mask_cvtepu32_ps` - `_mm_maskz_cvtepu32_ps` - `_mm256_mask_cvtepi32_pd` - `_mm256_maskz_cvtepi32_pd` - `_mm256_mask_cvtepi32_ps` - `_mm256_maskz_cvtepi32_ps` - `_mm256_cvtepu32_pd` - `_mm256_mask_cvtepu32_pd` - `_mm256_maskz_cvtepu32_pd` - `_mm256_cvtepu32_ps` - `_mm256_mask_cvtepu32_ps` - `_mm256_maskz_cvtepu32_ps` - `_mm512_cvtepi64_pd` - `_mm512_mask_cvtepi64_pd` - `_mm512_maskz_cvtepi64_pd` - `_mm512_cvtepu64_pd` - `_mm512_mask_cvtepu64_pd` - `_mm512_maskz_cvtepu64_pd` This PR is part 2 [[part 1](#156187)] of a series of PRs fixing #155798
1 parent 209d91d commit 8bdd909

File tree

4 files changed

+124
-53
lines changed

4 files changed

+124
-53
lines changed

clang/lib/Headers/avx512dqintrin.h

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -483,21 +483,20 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
483483
(__v8di)_mm512_setzero_si512(), \
484484
(__mmask8)(U), (int)(R)))
485485

486-
487-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
488-
_mm512_cvtepi64_pd (__m512i __A) {
486+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
487+
_mm512_cvtepi64_pd(__m512i __A) {
489488
return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
490489
}
491490

492-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
493-
_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
491+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
492+
_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
494493
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
495494
(__v8df)_mm512_cvtepi64_pd(__A),
496495
(__v8df)__W);
497496
}
498497

499-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
500-
_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
498+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
499+
_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
501500
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
502501
(__v8df)_mm512_cvtepi64_pd(__A),
503502
(__v8df)_mm512_setzero_pd());
@@ -714,20 +713,20 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
714713
(__v8di)_mm512_setzero_si512(), \
715714
(__mmask8)(U), (int)(R)))
716715

717-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
718-
_mm512_cvtepu64_pd (__m512i __A) {
716+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
717+
_mm512_cvtepu64_pd(__m512i __A) {
719718
return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
720719
}
721720

722-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
723-
_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
721+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
722+
_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
724723
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
725724
(__v8df)_mm512_cvtepu64_pd(__A),
726725
(__v8df)__W);
727726
}
728727

729-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
730-
_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
728+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
729+
_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
731730
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
732731
(__v8df)_mm512_cvtepu64_pd(__A),
733732
(__v8df)_mm512_setzero_pd());

clang/lib/Headers/avx512vlintrin.h

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,57 +1732,57 @@ _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
17321732
(__mmask8) __U);
17331733
}
17341734

1735-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1736-
_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1735+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1736+
_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
17371737
return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
17381738
(__v2df)_mm_cvtepi32_pd(__A),
17391739
(__v2df)__W);
17401740
}
17411741

1742-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1743-
_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1742+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1743+
_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
17441744
return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
17451745
(__v2df)_mm_cvtepi32_pd(__A),
17461746
(__v2df)_mm_setzero_pd());
17471747
}
17481748

1749-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
1750-
_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1749+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1750+
_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
17511751
return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
17521752
(__v4df)_mm256_cvtepi32_pd(__A),
17531753
(__v4df)__W);
17541754
}
17551755

1756-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
1757-
_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1756+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1757+
_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
17581758
return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
17591759
(__v4df)_mm256_cvtepi32_pd(__A),
17601760
(__v4df)_mm256_setzero_pd());
17611761
}
17621762

1763-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1764-
_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1763+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1764+
_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
17651765
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
17661766
(__v4sf)_mm_cvtepi32_ps(__A),
17671767
(__v4sf)__W);
17681768
}
17691769

1770-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1771-
_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1770+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1771+
_mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A) {
17721772
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
17731773
(__v4sf)_mm_cvtepi32_ps(__A),
17741774
(__v4sf)_mm_setzero_ps());
17751775
}
17761776

1777-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
1778-
_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1777+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1778+
_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
17791779
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
17801780
(__v8sf)_mm256_cvtepi32_ps(__A),
17811781
(__v8sf)__W);
17821782
}
17831783

1784-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
1785-
_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1784+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1785+
_mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A) {
17861786
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
17871787
(__v8sf)_mm256_cvtepi32_ps(__A),
17881788
(__v8sf)_mm256_setzero_ps());
@@ -2143,78 +2143,78 @@ _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
21432143
(__mmask8) __U);
21442144
}
21452145

2146-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2147-
_mm_cvtepu32_pd (__m128i __A) {
2146+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2147+
_mm_cvtepu32_pd(__m128i __A) {
21482148
return (__m128d) __builtin_convertvector(
21492149
__builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
21502150
}
21512151

2152-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2153-
_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2152+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2153+
_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
21542154
return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
21552155
(__v2df)_mm_cvtepu32_pd(__A),
21562156
(__v2df)__W);
21572157
}
21582158

2159-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2160-
_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2159+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2160+
_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
21612161
return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
21622162
(__v2df)_mm_cvtepu32_pd(__A),
21632163
(__v2df)_mm_setzero_pd());
21642164
}
21652165

2166-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
2167-
_mm256_cvtepu32_pd (__m128i __A) {
2166+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2167+
_mm256_cvtepu32_pd(__m128i __A) {
21682168
return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
21692169
}
21702170

2171-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
2172-
_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2171+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2172+
_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
21732173
return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
21742174
(__v4df)_mm256_cvtepu32_pd(__A),
21752175
(__v4df)__W);
21762176
}
21772177

2178-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
2179-
_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2178+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
2179+
_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
21802180
return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
21812181
(__v4df)_mm256_cvtepu32_pd(__A),
21822182
(__v4df)_mm256_setzero_pd());
21832183
}
21842184

2185-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2186-
_mm_cvtepu32_ps (__m128i __A) {
2185+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2186+
_mm_cvtepu32_ps(__m128i __A) {
21872187
return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
21882188
}
21892189

2190-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2191-
_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2190+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2191+
_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
21922192
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
21932193
(__v4sf)_mm_cvtepu32_ps(__A),
21942194
(__v4sf)__W);
21952195
}
21962196

2197-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2198-
_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2197+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2198+
_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) {
21992199
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
22002200
(__v4sf)_mm_cvtepu32_ps(__A),
22012201
(__v4sf)_mm_setzero_ps());
22022202
}
22032203

2204-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
2205-
_mm256_cvtepu32_ps (__m256i __A) {
2204+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2205+
_mm256_cvtepu32_ps(__m256i __A) {
22062206
return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
22072207
}
22082208

2209-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
2210-
_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2209+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2210+
_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
22112211
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
22122212
(__v8sf)_mm256_cvtepu32_ps(__A),
22132213
(__v8sf)__W);
22142214
}
22152215

2216-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
2217-
_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2216+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
2217+
_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) {
22182218
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
22192219
(__v8sf)_mm256_cvtepu32_ps(__A),
22202220
(__v8sf)_mm256_setzero_ps());

clang/test/CodeGen/X86/avx512dq-builtins.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,20 +602,26 @@ __m512d test_mm512_cvtepi64_pd(__m512i __A) {
602602
return _mm512_cvtepi64_pd(__A);
603603
}
604604

605+
TEST_CONSTEXPR(match_m512d(_mm512_cvtepi64_pd((__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, -1.0, 2.0, 2.0, -4.0, -4.0, 8.0, 8.0));
606+
605607
__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
606608
// CHECK-LABEL: test_mm512_mask_cvtepi64_pd
607609
// CHECK: sitofp <8 x i64> %{{.*}} to <8 x double>
608610
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
609611
return _mm512_mask_cvtepi64_pd(__W, __U, __A);
610612
}
611613

614+
TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepi64_pd(_mm512_set1_pd(-777.0), /*1010 0101=*/0xa5, (__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, -777.0, 2.0, -777.0, -777.0, -4.0, -777.0, 8.0));
615+
612616
__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
613617
// CHECK-LABEL: test_mm512_maskz_cvtepi64_pd
614618
// CHECK: sitofp <8 x i64> %{{.*}} to <8 x double>
615619
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
616620
return _mm512_maskz_cvtepi64_pd(__U, __A);
617621
}
618622

623+
TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepi64_pd(/*1010 0101=*/0xa5, (__m512i)(__v8di){-1, -1, 2, 2, -4, -4, 8, 8}), -1.0, 0.0, 2.0, 0.0, 0.0, -4.0, 0.0, 8.0));
624+
619625
__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
620626
// CHECK-LABEL: test_mm512_cvt_roundepi64_pd
621627
// CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
@@ -826,20 +832,26 @@ __m512d test_mm512_cvtepu64_pd(__m512i __A) {
826832
return _mm512_cvtepu64_pd(__A);
827833
}
828834

835+
TEST_CONSTEXPR(match_m512d(_mm512_cvtepu64_pd((__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 1.0, 2.0, 2.0, 4.0, 4.0, 8.0, 8.0));
836+
829837
__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
830838
// CHECK-LABEL: test_mm512_mask_cvtepu64_pd
831839
// CHECK: uitofp <8 x i64> %{{.*}} to <8 x double>
832840
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
833841
return _mm512_mask_cvtepu64_pd(__W, __U, __A);
834842
}
835843

844+
TEST_CONSTEXPR(match_m512d(_mm512_mask_cvtepu64_pd(_mm512_set1_pd(-777.0), /*1010 0101=*/0xa5, (__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, -777.0, 2.0, -777.0, -777.0, 4.0, -777.0, 8.0));
845+
836846
__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
837847
// CHECK-LABEL: test_mm512_maskz_cvtepu64_pd
838848
// CHECK: uitofp <8 x i64> %{{.*}} to <8 x double>
839849
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
840850
return _mm512_maskz_cvtepu64_pd(__U, __A);
841851
}
842852

853+
TEST_CONSTEXPR(match_m512d(_mm512_maskz_cvtepu64_pd(/*1010 0101=*/0xa5, (__m512i)(__v8du){1, 1, 2, 2, 4, 4, 8, 8}), 1.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 8.0));
854+
843855
__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
844856
// CHECK-LABEL: test_mm512_cvt_roundepu64_pd
845857
// CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64

0 commit comments

Comments
 (0)