Skip to content

Commit 7ace3ac

Browse files
committed
[clang] add masked sqrt 512
Signed-off-by: Shreeyash Pandey <[email protected]>
1 parent d33f881 commit 7ace3ac

File tree

2 files changed

+18
-12
lines changed

2 files changed

+18
-12
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,17 +1464,15 @@ _mm512_sqrt_pd(__m512d __A) {
14641464
_MM_FROUND_CUR_DIRECTION);
14651465
}
14661466

1467-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
1468-
_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1469-
{
1467+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1468+
_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) {
14701469
return (__m512d)__builtin_ia32_selectpd_512(__U,
14711470
(__v8df)_mm512_sqrt_pd(__A),
14721471
(__v8df)__W);
14731472
}
14741473

1475-
static __inline__ __m512d __DEFAULT_FN_ATTRS512
1476-
_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1477-
{
1474+
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
1475+
_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) {
14781476
return (__m512d)__builtin_ia32_selectpd_512(__U,
14791477
(__v8df)_mm512_sqrt_pd(__A),
14801478
(__v8df)_mm512_setzero_pd());
@@ -1499,17 +1497,15 @@ _mm512_sqrt_ps(__m512 __A) {
14991497
_MM_FROUND_CUR_DIRECTION);
15001498
}
15011499

1502-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1503-
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1504-
{
1500+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1501+
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) {
15051502
return (__m512)__builtin_ia32_selectps_512(__U,
15061503
(__v16sf)_mm512_sqrt_ps(__A),
15071504
(__v16sf)__W);
15081505
}
15091506

1510-
static __inline__ __m512 __DEFAULT_FN_ATTRS512
1511-
_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1512-
{
1507+
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
1508+
_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) {
15131509
return (__m512)__builtin_ia32_selectps_512(__U,
15141510
(__v16sf)_mm512_sqrt_ps(__A),
15151511
(__v16sf)_mm512_setzero_ps());

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
2929
return _mm512_mask_sqrt_pd (__W,__U,__A);
3030
}
3131

32+
TEST_CONSTEXPR(match_m512d(_mm512_mask_sqrt_pd(
33+
_mm512_set_pd(800.0, 700.0, 600.0, 500.0, 400.0, 300.0, 200.0, 100.0),
34+
0b11000011,
35+
_mm512_set_pd(64.0, 49.0, 36.0, 25.0, 16.0, 9.0, 4.0, 1.0)),
36+
1.0, 2.0, 300.0, 400.0, 500.0, 600.0, 7.0, 8.0));
37+
3238
__m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
3339
{
3440
// CHECK-LABEL: test_mm512_maskz_sqrt_pd
@@ -38,6 +44,10 @@ __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
3844
return _mm512_maskz_sqrt_pd (__U,__A);
3945
}
4046

47+
TEST_CONSTEXPR(match_m512d(_mm512_maskz_sqrt_pd(0b00001111,
48+
_mm512_set_pd(64.0, 49.0, 36.0, 25.0, 16.0, 9.0, 4.0, 1.0)),
49+
1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0));
50+
4151
__m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A)
4252
{
4353
// CHECK-LABEL: test_mm512_mask_sqrt_round_pd

0 commit comments

Comments
 (0)