Skip to content

Commit 69a4e22

Browse files
committed
[clang] add masked sqrt
Signed-off-by: Shreeyash Pandey <[email protected]>
1 parent bd73af2 commit 69a4e22

File tree

2 files changed

+32
-8
lines changed

2 files changed

+32
-8
lines changed

clang/lib/Headers/avx512vlintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3444,56 +3444,56 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
34443444
(__v8si)(__m256i)(index), \
34453445
(__v8si)(__m256i)(v1), (int)(scale))
34463446

3447-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
3447+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
34483448
_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
34493449
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
34503450
(__v2df)_mm_sqrt_pd(__A),
34513451
(__v2df)__W);
34523452
}
34533453

3454-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
3454+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
34553455
_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
34563456
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
34573457
(__v2df)_mm_sqrt_pd(__A),
34583458
(__v2df)_mm_setzero_pd());
34593459
}
34603460

3461-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
3461+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
34623462
_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
34633463
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
34643464
(__v4df)_mm256_sqrt_pd(__A),
34653465
(__v4df)__W);
34663466
}
34673467

3468-
static __inline__ __m256d __DEFAULT_FN_ATTRS256
3468+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
34693469
_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
34703470
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
34713471
(__v4df)_mm256_sqrt_pd(__A),
34723472
(__v4df)_mm256_setzero_pd());
34733473
}
34743474

3475-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
3475+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
34763476
_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
34773477
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
34783478
(__v4sf)_mm_sqrt_ps(__A),
34793479
(__v4sf)__W);
34803480
}
34813481

3482-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
3482+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
34833483
_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
34843484
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
34853485
(__v4sf)_mm_sqrt_ps(__A),
34863486
(__v4sf)_mm_setzero_ps());
34873487
}
34883488

3489-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
3489+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
34903490
_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
34913491
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
34923492
(__v8sf)_mm256_sqrt_ps(__A),
34933493
(__v8sf)__W);
34943494
}
34953495

3496-
static __inline__ __m256 __DEFAULT_FN_ATTRS256
3496+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
34973497
_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
34983498
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
34993499
(__v8sf)_mm256_sqrt_ps(__A),

clang/test/CodeGen/X86/avx512vl-builtins.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5514,48 +5514,72 @@ __m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
55145514
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
55155515
return _mm_mask_sqrt_pd(__W,__U,__A);
55165516
}
5517+
5518+
TEST_CONSTEXPR(match_m128d(_mm_mask_sqrt_pd(_mm_set_pd(200.0, 100.0), 0b01, _mm_set_pd(9.0, 4.0)), 2.0, 200.0));
5519+
55175520
__m128d test_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
55185521
// CHECK-LABEL: test_mm_maskz_sqrt_pd
55195522
// CHECK: @llvm.sqrt.v2f64
55205523
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
55215524
return _mm_maskz_sqrt_pd(__U,__A);
55225525
}
5526+
5527+
TEST_CONSTEXPR(match_m128d(_mm_maskz_sqrt_pd(0b10, _mm_set_pd(9.0, 4.0)), 0.0, 3.0));
5528+
55235529
__m256d test_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
55245530
// CHECK-LABEL: test_mm256_mask_sqrt_pd
55255531
// CHECK: @llvm.sqrt.v4f64
55265532
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
55275533
return _mm256_mask_sqrt_pd(__W,__U,__A);
55285534
}
5535+
5536+
TEST_CONSTEXPR(match_m256d(_mm256_mask_sqrt_pd(_mm256_set_pd(400.0, 300.0, 200.0, 100.0), 0b1001, _mm256_set_pd(25.0, 16.0, 9.0, 4.0)), 2.0, 200.0, 300.0, 5.0));
5537+
55295538
__m256d test_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
55305539
// CHECK-LABEL: test_mm256_maskz_sqrt_pd
55315540
// CHECK: @llvm.sqrt.v4f64
55325541
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
55335542
return _mm256_maskz_sqrt_pd(__U,__A);
55345543
}
5544+
5545+
TEST_CONSTEXPR(match_m256d(_mm256_maskz_sqrt_pd(0b0110, _mm256_set_pd(25.0, 16.0, 9.0, 4.0)), 0.0, 3.0, 4.0, 0.0));
5546+
55355547
__m128 test_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
55365548
// CHECK-LABEL: test_mm_mask_sqrt_ps
55375549
// CHECK: @llvm.sqrt.v4f32
55385550
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
55395551
return _mm_mask_sqrt_ps(__W,__U,__A);
55405552
}
5553+
5554+
TEST_CONSTEXPR(match_m128(_mm_mask_sqrt_ps(_mm_set_ps(400.0f, 300.0f, 200.0f, 100.0f), 0b1010, _mm_set_ps(25.0f, 16.0f, 9.0f, 4.0f)), 100.0f, 3.0f, 300.0f, 5.0f));
5555+
55415556
__m128 test_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
55425557
// CHECK-LABEL: test_mm_maskz_sqrt_ps
55435558
// CHECK: @llvm.sqrt.v4f32
55445559
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
55455560
return _mm_maskz_sqrt_ps(__U,__A);
55465561
}
5562+
5563+
TEST_CONSTEXPR(match_m128(_mm_maskz_sqrt_ps(0b0011, _mm_set_ps(25.0f, 16.0f, 9.0f, 4.0f)), 2.0f, 3.0f, 0.0f, 0.0f));
5564+
55475565
__m256 test_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
55485566
// CHECK-LABEL: test_mm256_mask_sqrt_ps
55495567
// CHECK: @llvm.sqrt.v8f32
55505568
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
55515569
return _mm256_mask_sqrt_ps(__W,__U,__A);
55525570
}
5571+
5572+
TEST_CONSTEXPR(match_m256(_mm256_mask_sqrt_ps(_mm256_set_ps(800.0f, 700.0f, 600.0f, 500.0f, 400.0f, 300.0f, 200.0f, 100.0f), 0b11001100, _mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 100.0f, 200.0f, 3.0f, 4.0f, 500.0f, 600.0f, 7.0f, 8.0f));
5573+
55535574
__m256 test_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
55545575
// CHECK-LABEL: test_mm256_maskz_sqrt_ps
55555576
// CHECK: @llvm.sqrt.v8f32
55565577
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
55575578
return _mm256_maskz_sqrt_ps(__U,__A);
55585579
}
5580+
5581+
TEST_CONSTEXPR(match_m256(_mm256_maskz_sqrt_ps(0b11110000, _mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 0.0f, 0.0f, 0.0f, 0.0f, 5.0f, 6.0f, 7.0f, 8.0f));
5582+
55595583
__m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
55605584
// CHECK-LABEL: test_mm_mask_sub_pd
55615585
// CHECK: fsub <2 x double> %{{.*}}, %{{.*}}

0 commit comments

Comments
 (0)