Skip to content

Commit faca9dd

Browse files
authored
[Headers][X86] Group related AVX512VL FMA intrinsics together (NFC) (#156794)
Follow-up of #156385.
1 parent 21532f0 commit faca9dd

File tree

2 files changed

+361
-361
lines changed

2 files changed

+361
-361
lines changed

clang/lib/Headers/avx512vlintrin.h

Lines changed: 96 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -925,13 +925,25 @@ _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
925925
(__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__A);
926926
}
927927

928+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
929+
_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
930+
return (__m128d)__builtin_ia32_selectpd_128(
931+
(__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
932+
}
933+
928934
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
929935
_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
930936
return (__m128d)__builtin_ia32_selectpd_128(
931937
(__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C),
932938
(__v2df)_mm_setzero_pd());
933939
}
934940

941+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
942+
_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
943+
return (__m128d)__builtin_ia32_selectpd_128(
944+
(__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
945+
}
946+
935947
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
936948
_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
937949
return (__m128d)__builtin_ia32_selectpd_128(
@@ -945,6 +957,18 @@ _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
945957
(__v2df)_mm_setzero_pd());
946958
}
947959

960+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
961+
_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
962+
return (__m128d)__builtin_ia32_selectpd_128(
963+
(__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
964+
}
965+
966+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
967+
_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
968+
return (__m128d)__builtin_ia32_selectpd_128(
969+
(__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
970+
}
971+
948972
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
949973
_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
950974
return (__m128d)__builtin_ia32_selectpd_128(
@@ -977,13 +1001,25 @@ _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
9771001
(__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__A);
9781002
}
9791003

1004+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1005+
_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
1006+
return (__m256d)__builtin_ia32_selectpd_256(
1007+
(__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
1008+
}
1009+
9801010
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
9811011
_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
9821012
return (__m256d)__builtin_ia32_selectpd_256(
9831013
(__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C),
9841014
(__v4df)_mm256_setzero_pd());
9851015
}
9861016

1017+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1018+
_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
1019+
return (__m256d)__builtin_ia32_selectpd_256(
1020+
(__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
1021+
}
1022+
9871023
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
9881024
_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
9891025
return (__m256d)__builtin_ia32_selectpd_256(
@@ -997,6 +1033,18 @@ _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
9971033
(__v4df)_mm256_setzero_pd());
9981034
}
9991035

1036+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1037+
_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
1038+
return (__m256d)__builtin_ia32_selectpd_256(
1039+
(__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
1040+
}
1041+
1042+
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1043+
_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
1044+
return (__m256d)__builtin_ia32_selectpd_256(
1045+
(__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
1046+
}
1047+
10001048
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
10011049
_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
10021050
return (__m256d)__builtin_ia32_selectpd_256(
@@ -1029,13 +1077,25 @@ _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
10291077
(__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__A);
10301078
}
10311079

1080+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1081+
_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1082+
return (__m128)__builtin_ia32_selectps_128(
1083+
(__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
1084+
}
1085+
10321086
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
10331087
_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
10341088
return (__m128)__builtin_ia32_selectps_128(
10351089
(__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C),
10361090
(__v4sf)_mm_setzero_ps());
10371091
}
10381092

1093+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1094+
_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1095+
return (__m128)__builtin_ia32_selectps_128(
1096+
(__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
1097+
}
1098+
10391099
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
10401100
_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
10411101
return (__m128)__builtin_ia32_selectps_128(
@@ -1049,6 +1109,18 @@ _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
10491109
(__v4sf)_mm_setzero_ps());
10501110
}
10511111

1112+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1113+
_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1114+
return (__m128)__builtin_ia32_selectps_128(
1115+
(__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
1116+
}
1117+
1118+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1119+
_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1120+
return (__m128)__builtin_ia32_selectps_128(
1121+
(__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
1122+
}
1123+
10521124
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
10531125
_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
10541126
return (__m128)__builtin_ia32_selectps_128(
@@ -1081,13 +1153,25 @@ _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
10811153
(__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__A);
10821154
}
10831155

1156+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1157+
_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1158+
return (__m256)__builtin_ia32_selectps_256(
1159+
(__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
1160+
}
1161+
10841162
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
10851163
_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
10861164
return (__m256)__builtin_ia32_selectps_256(
10871165
(__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C),
10881166
(__v8sf)_mm256_setzero_ps());
10891167
}
10901168

1169+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1170+
_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1171+
return (__m256)__builtin_ia32_selectps_256(
1172+
(__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
1173+
}
1174+
10911175
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
10921176
_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
10931177
return (__m256)__builtin_ia32_selectps_256(
@@ -1101,6 +1185,18 @@ _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
11011185
(__v8sf)_mm256_setzero_ps());
11021186
}
11031187

1188+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1189+
_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1190+
return (__m256)__builtin_ia32_selectps_256(
1191+
(__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
1192+
}
1193+
1194+
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1195+
_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1196+
return (__m256)__builtin_ia32_selectps_256(
1197+
(__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
1198+
}
1199+
11041200
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
11051201
_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
11061202
return (__m256)__builtin_ia32_selectps_256(
@@ -1308,30 +1404,6 @@ _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
13081404
(__v8sf)_mm256_setzero_ps());
13091405
}
13101406

1311-
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1312-
_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
1313-
return (__m128d)__builtin_ia32_selectpd_128(
1314-
(__mmask8)__U, (__v2df)_mm_fmsub_pd(__A, __B, __C), (__v2df)__C);
1315-
}
1316-
1317-
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1318-
_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
1319-
return (__m256d)__builtin_ia32_selectpd_256(
1320-
(__mmask8)__U, (__v4df)_mm256_fmsub_pd(__A, __B, __C), (__v4df)__C);
1321-
}
1322-
1323-
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1324-
_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1325-
return (__m128)__builtin_ia32_selectps_128(
1326-
(__mmask8)__U, (__v4sf)_mm_fmsub_ps(__A, __B, __C), (__v4sf)__C);
1327-
}
1328-
1329-
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1330-
_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1331-
return (__m256)__builtin_ia32_selectps_256(
1332-
(__mmask8)__U, (__v8sf)_mm256_fmsub_ps(__A, __B, __C), (__v8sf)__C);
1333-
}
1334-
13351407
static __inline__ __m128d __DEFAULT_FN_ATTRS128
13361408
_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
13371409
{
@@ -1372,78 +1444,6 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
13721444
(__v8sf) __C);
13731445
}
13741446

1375-
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1376-
_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
1377-
return (__m128d)__builtin_ia32_selectpd_128(
1378-
(__mmask8)__U, (__v2df)_mm_fnmadd_pd(__A, __B, __C), (__v2df)__A);
1379-
}
1380-
1381-
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1382-
_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
1383-
return (__m256d)__builtin_ia32_selectpd_256(
1384-
(__mmask8)__U, (__v4df)_mm256_fnmadd_pd(__A, __B, __C), (__v4df)__A);
1385-
}
1386-
1387-
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1388-
_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1389-
return (__m128)__builtin_ia32_selectps_128(
1390-
(__mmask8)__U, (__v4sf)_mm_fnmadd_ps(__A, __B, __C), (__v4sf)__A);
1391-
}
1392-
1393-
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1394-
_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1395-
return (__m256)__builtin_ia32_selectps_256(
1396-
(__mmask8)__U, (__v8sf)_mm256_fnmadd_ps(__A, __B, __C), (__v8sf)__A);
1397-
}
1398-
1399-
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1400-
_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
1401-
return (__m128d)__builtin_ia32_selectpd_128(
1402-
(__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__A);
1403-
}
1404-
1405-
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1406-
_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
1407-
return (__m128d)__builtin_ia32_selectpd_128(
1408-
(__mmask8)__U, (__v2df)_mm_fnmsub_pd(__A, __B, __C), (__v2df)__C);
1409-
}
1410-
1411-
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1412-
_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
1413-
return (__m256d)__builtin_ia32_selectpd_256(
1414-
(__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__A);
1415-
}
1416-
1417-
static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
1418-
_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
1419-
return (__m256d)__builtin_ia32_selectpd_256(
1420-
(__mmask8)__U, (__v4df)_mm256_fnmsub_pd(__A, __B, __C), (__v4df)__C);
1421-
}
1422-
1423-
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1424-
_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
1425-
return (__m128)__builtin_ia32_selectps_128(
1426-
(__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__A);
1427-
}
1428-
1429-
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1430-
_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
1431-
return (__m128)__builtin_ia32_selectps_128(
1432-
(__mmask8)__U, (__v4sf)_mm_fnmsub_ps(__A, __B, __C), (__v4sf)__C);
1433-
}
1434-
1435-
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1436-
_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
1437-
return (__m256)__builtin_ia32_selectps_256(
1438-
(__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__A);
1439-
}
1440-
1441-
static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
1442-
_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
1443-
return (__m256)__builtin_ia32_selectps_256(
1444-
(__mmask8)__U, (__v8sf)_mm256_fnmsub_ps(__A, __B, __C), (__v8sf)__C);
1445-
}
1446-
14471447
static __inline__ __m128d __DEFAULT_FN_ATTRS128
14481448
_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
14491449
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,

0 commit comments

Comments
 (0)