Skip to content

Commit 788fb4d

Browse files
committed
Define mask variants using the base intrinsic
1 parent b9bc673 commit 788fb4d

File tree

3 files changed

+152
-280
lines changed

3 files changed

+152
-280
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 40 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2510,25 +2510,20 @@ _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
25102510
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25112511
_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
25122512
return (__m512d)__builtin_ia32_selectpd_512(
2513-
(__mmask8)__U,
2514-
__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, (__v8df)__C),
2515-
(__v8df)__A);
2513+
(__mmask8)__U, _mm512_fmadd_pd(__A, __B, __C), (__v8df)__A);
25162514
}
25172515

25182516
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25192517
_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
25202518
return (__m512d)__builtin_ia32_selectpd_512(
2521-
(__mmask8)__U,
2522-
__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, (__v8df)__C),
2523-
(__v8df)__C);
2519+
(__mmask8)__U, _mm512_fmadd_pd(__A, __B, __C), (__v8df)__C);
25242520
}
25252521

25262522
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25272523
_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2528-
return (__m512d)__builtin_ia32_selectpd_512(
2529-
(__mmask8)__U,
2530-
__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, (__v8df)__C),
2531-
(__v8df)_mm512_setzero_pd());
2524+
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2525+
_mm512_fmadd_pd(__A, __B, __C),
2526+
(__v8df)_mm512_setzero_pd());
25322527
}
25332528

25342529
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
@@ -2540,25 +2535,20 @@ _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
25402535
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25412536
_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
25422537
return (__m512d)__builtin_ia32_selectpd_512(
2543-
(__mmask8)__U,
2544-
__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, -(__v8df)__C),
2545-
(__v8df)__A);
2538+
(__mmask8)__U, _mm512_fmsub_pd(__A, __B, __C), (__v8df)__A);
25462539
}
25472540

25482541
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25492542
_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
25502543
return (__m512d)__builtin_ia32_selectpd_512(
2551-
(__mmask8)__U,
2552-
__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, -(__v8df)__C),
2553-
(__v8df)__C);
2544+
(__mmask8)__U, _mm512_fmsub_pd(__A, __B, __C), (__v8df)__C);
25542545
}
25552546

25562547
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25572548
_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2558-
return (__m512d)__builtin_ia32_selectpd_512(
2559-
(__mmask8)__U,
2560-
__builtin_elementwise_fma((__v8df)__A, (__v8df)__B, -(__v8df)__C),
2561-
(__v8df)_mm512_setzero_pd());
2549+
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2550+
_mm512_fmsub_pd(__A, __B, __C),
2551+
(__v8df)_mm512_setzero_pd());
25622552
}
25632553

25642554
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
@@ -2570,25 +2560,20 @@ _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
25702560
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25712561
_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
25722562
return (__m512d)__builtin_ia32_selectpd_512(
2573-
(__mmask8)__U,
2574-
__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, (__v8df)__C),
2575-
(__v8df)__A);
2563+
(__mmask8)__U, _mm512_fnmadd_pd(__A, __B, __C), (__v8df)__A);
25762564
}
25772565

25782566
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25792567
_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
25802568
return (__m512d)__builtin_ia32_selectpd_512(
2581-
(__mmask8)__U,
2582-
__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, (__v8df)__C),
2583-
(__v8df)__C);
2569+
(__mmask8)__U, _mm512_fnmadd_pd(__A, __B, __C), (__v8df)__C);
25842570
}
25852571

25862572
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
25872573
_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2588-
return (__m512d)__builtin_ia32_selectpd_512(
2589-
(__mmask8)__U,
2590-
__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, (__v8df)__C),
2591-
(__v8df)_mm512_setzero_pd());
2574+
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2575+
_mm512_fnmadd_pd(__A, __B, __C),
2576+
(__v8df)_mm512_setzero_pd());
25922577
}
25932578

25942579
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
@@ -2600,25 +2585,20 @@ _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
26002585
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
26012586
_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
26022587
return (__m512d)__builtin_ia32_selectpd_512(
2603-
(__mmask8)__U,
2604-
__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, -(__v8df)__C),
2605-
(__v8df)__A);
2588+
(__mmask8)__U, _mm512_fnmsub_pd(__A, __B, __C), (__v8df)__A);
26062589
}
26072590

26082591
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
26092592
_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
26102593
return (__m512d)__builtin_ia32_selectpd_512(
2611-
(__mmask8)__U,
2612-
__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, -(__v8df)__C),
2613-
(__v8df)__C);
2594+
(__mmask8)__U, _mm512_fnmsub_pd(__A, __B, __C), (__v8df)__C);
26142595
}
26152596

26162597
static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
26172598
_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
2618-
return (__m512d)__builtin_ia32_selectpd_512(
2619-
(__mmask8)__U,
2620-
__builtin_elementwise_fma(-(__v8df)__A, (__v8df)__B, -(__v8df)__C),
2621-
(__v8df)_mm512_setzero_pd());
2599+
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2600+
_mm512_fnmsub_pd(__A, __B, __C),
2601+
(__v8df)_mm512_setzero_pd());
26222602
}
26232603

26242604
#define _mm512_fmadd_round_ps(A, B, C, R) \
@@ -2713,25 +2693,20 @@ _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
27132693
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27142694
_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
27152695
return (__m512)__builtin_ia32_selectps_512(
2716-
(__mmask16)__U,
2717-
__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
2718-
(__v16sf)__A);
2696+
(__mmask16)__U, _mm512_fmadd_ps(__A, __B, __C), (__v16sf)__A);
27192697
}
27202698

27212699
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27222700
_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
27232701
return (__m512)__builtin_ia32_selectps_512(
2724-
(__mmask16)__U,
2725-
__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
2726-
(__v16sf)__C);
2702+
(__mmask16)__U, _mm512_fmadd_ps(__A, __B, __C), (__v16sf)__C);
27272703
}
27282704

27292705
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27302706
_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2731-
return (__m512)__builtin_ia32_selectps_512(
2732-
(__mmask16)__U,
2733-
__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
2734-
(__v16sf)_mm512_setzero_ps());
2707+
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2708+
_mm512_fmadd_ps(__A, __B, __C),
2709+
(__v16sf)_mm512_setzero_ps());
27352710
}
27362711

27372712
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
@@ -2743,25 +2718,20 @@ _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
27432718
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27442719
_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
27452720
return (__m512)__builtin_ia32_selectps_512(
2746-
(__mmask16)__U,
2747-
__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
2748-
(__v16sf)__A);
2721+
(__mmask16)__U, _mm512_fmsub_ps(__A, __B, __C), (__v16sf)__A);
27492722
}
27502723

27512724
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27522725
_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
27532726
return (__m512)__builtin_ia32_selectps_512(
2754-
(__mmask16)__U,
2755-
__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
2756-
(__v16sf)__C);
2727+
(__mmask16)__U, _mm512_fmsub_ps(__A, __B, __C), (__v16sf)__C);
27572728
}
27582729

27592730
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27602731
_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2761-
return (__m512)__builtin_ia32_selectps_512(
2762-
(__mmask16)__U,
2763-
__builtin_elementwise_fma((__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
2764-
(__v16sf)_mm512_setzero_ps());
2732+
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2733+
_mm512_fmsub_ps(__A, __B, __C),
2734+
(__v16sf)_mm512_setzero_ps());
27652735
}
27662736

27672737
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
@@ -2773,25 +2743,20 @@ _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
27732743
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27742744
_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
27752745
return (__m512)__builtin_ia32_selectps_512(
2776-
(__mmask16)__U,
2777-
__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
2778-
(__v16sf)__A);
2746+
(__mmask16)__U, _mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__A);
27792747
}
27802748

27812749
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27822750
_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
27832751
return (__m512)__builtin_ia32_selectps_512(
2784-
(__mmask16)__U,
2785-
__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
2786-
(__v16sf)__C);
2752+
(__mmask16)__U, _mm512_fnmadd_ps(__A, __B, __C), (__v16sf)__C);
27872753
}
27882754

27892755
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
27902756
_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2791-
return (__m512)__builtin_ia32_selectps_512(
2792-
(__mmask16)__U,
2793-
__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, (__v16sf)__C),
2794-
(__v16sf)_mm512_setzero_ps());
2757+
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2758+
_mm512_fnmadd_ps(__A, __B, __C),
2759+
(__v16sf)_mm512_setzero_ps());
27952760
}
27962761

27972762
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
@@ -2803,25 +2768,20 @@ _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
28032768
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
28042769
_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
28052770
return (__m512)__builtin_ia32_selectps_512(
2806-
(__mmask16)__U,
2807-
__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
2808-
(__v16sf)__A);
2771+
(__mmask16)__U, _mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__A);
28092772
}
28102773

28112774
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
28122775
_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
28132776
return (__m512)__builtin_ia32_selectps_512(
2814-
(__mmask16)__U,
2815-
__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
2816-
(__v16sf)__C);
2777+
(__mmask16)__U, _mm512_fnmsub_ps(__A, __B, __C), (__v16sf)__C);
28172778
}
28182779

28192780
static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
28202781
_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
2821-
return (__m512)__builtin_ia32_selectps_512(
2822-
(__mmask16)__U,
2823-
__builtin_elementwise_fma(-(__v16sf)__A, (__v16sf)__B, -(__v16sf)__C),
2824-
(__v16sf)_mm512_setzero_ps());
2782+
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2783+
_mm512_fnmsub_ps(__A, __B, __C),
2784+
(__v16sf)_mm512_setzero_ps());
28252785
}
28262786

28272787
#define _mm512_fmaddsub_round_pd(A, B, C, R) \

0 commit comments

Comments
 (0)