Skip to content

Commit a14bb33

Browse files
author
liuzhenya
committed
[Headers][X86] Allow AVX512 masked arithmetic ss/sd intrinsics to be used in constexpr
1 parent b3d6264 commit a14bb33

File tree

6 files changed

+121
-33
lines changed

6 files changed

+121
-33
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4128,7 +4128,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
41284128
def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
41294129
}
41304130

4131-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4131+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
41324132
def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
41334133
def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
41344134
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,6 +2419,27 @@ static bool interp__builtin_elementwise_int_unaryop(
24192419
return false;
24202420
}
24212421

2422+
static bool interp__builtin_select_scalar(InterpState &S,
2423+
const CallExpr *Call) {
2424+
unsigned N =
2425+
Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements();
2426+
2427+
const Pointer &W = S.Stk.pop<Pointer>();
2428+
const Pointer &A = S.Stk.pop<Pointer>();
2429+
APSInt U = popToAPSInt(S, Call->getArg(0));
2430+
const Pointer &Dst = S.Stk.peek<Pointer>();
2431+
2432+
bool TakeA0 = U.getZExtValue() & 1ULL;
2433+
2434+
for (unsigned I = 0; I < N; ++I)
2435+
Dst.elem<Floating>(I) = W.elem<Floating>(I);
2436+
if (TakeA0)
2437+
Dst.elem<Floating>(0) = A.elem<Floating>(0);
2438+
2439+
Dst.initializeAllElements();
2440+
return true;
2441+
}
2442+
24222443
static bool interp__builtin_elementwise_int_binop(
24232444
InterpState &S, CodePtr OpPC, const CallExpr *Call,
24242445
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
@@ -4205,6 +4226,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
42054226
return APInt::getAllOnes(DstBits);
42064227
});
42074228

4229+
case clang::X86::BI__builtin_ia32_selectss_128:
4230+
case clang::X86::BI__builtin_ia32_selectsd_128:
4231+
return interp__builtin_select_scalar(S, Call);
42084232
case clang::X86::BI__builtin_ia32_vprotbi:
42094233
case clang::X86::BI__builtin_ia32_vprotdi:
42104234
case clang::X86::BI__builtin_ia32_vprotqi:

clang/lib/AST/ExprConstant.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12264,6 +12264,24 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1226412264
return Success(APValue(ResultElements.data(), SourceLen), E);
1226512265
};
1226612266

12267+
auto EvalSelectScalar = [&](unsigned Len) -> bool {
12268+
APSInt Mask;
12269+
APValue AVal, WVal;
12270+
if (!EvaluateInteger(E->getArg(0), Mask, Info) ||
12271+
!EvaluateAsRValue(Info, E->getArg(1), AVal) ||
12272+
!EvaluateAsRValue(Info, E->getArg(2), WVal))
12273+
return false;
12274+
12275+
bool TakeA0 = (Mask.getZExtValue() & 1u) != 0;
12276+
SmallVector<APValue, 4> Res;
12277+
Res.reserve(Len);
12278+
Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0));
12279+
for (unsigned i = 1; i < Len; ++i)
12280+
Res.push_back(WVal.getVectorElt(i));
12281+
APValue V(Res.data(), Res.size());
12282+
return Success(V, E);
12283+
};
12284+
1226712285
switch (E->getBuiltinCallee()) {
1226812286
default:
1226912287
return false;
@@ -12567,6 +12585,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1256712585
return APInt((Src).trunc(DstBits));
1256812586
return APInt::getAllOnes(DstBits);
1256912587
});
12588+
case clang::X86::BI__builtin_ia32_selectss_128:
12589+
return EvalSelectScalar(4);
12590+
case clang::X86::BI__builtin_ia32_selectsd_128:
12591+
return EvalSelectScalar(2);
1257012592
case clang::X86::BI__builtin_ia32_pmuldq128:
1257112593
case clang::X86::BI__builtin_ia32_pmuldq256:
1257212594
case clang::X86::BI__builtin_ia32_pmuldq512:

clang/lib/Headers/avx512fintrin.h

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,14 +1834,14 @@ _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
18341834
(__v16si)_mm512_setzero_si512());
18351835
}
18361836

1837-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1838-
_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1837+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1838+
_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
18391839
__A = _mm_add_ss(__A, __B);
18401840
return __builtin_ia32_selectss_128(__U, __A, __W);
18411841
}
18421842

1843-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1844-
_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1843+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1844+
_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
18451845
__A = _mm_add_ss(__A, __B);
18461846
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
18471847
}
@@ -1864,14 +1864,14 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
18641864
(__v4sf)_mm_setzero_ps(), \
18651865
(__mmask8)(U), (int)(R)))
18661866

1867-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1868-
_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1867+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1868+
_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
18691869
__A = _mm_add_sd(__A, __B);
18701870
return __builtin_ia32_selectsd_128(__U, __A, __W);
18711871
}
18721872

1873-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1874-
_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1873+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1874+
_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
18751875
__A = _mm_add_sd(__A, __B);
18761876
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
18771877
}
@@ -1949,14 +1949,14 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
19491949
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
19501950
(__v16sf)_mm512_setzero_ps()))
19511951

1952-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1953-
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1952+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1953+
_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
19541954
__A = _mm_sub_ss(__A, __B);
19551955
return __builtin_ia32_selectss_128(__U, __A, __W);
19561956
}
19571957

1958-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
1959-
_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1958+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
1959+
_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
19601960
__A = _mm_sub_ss(__A, __B);
19611961
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
19621962
}
@@ -1978,14 +1978,14 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
19781978
(__v4sf)_mm_setzero_ps(), \
19791979
(__mmask8)(U), (int)(R)))
19801980

1981-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1982-
_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1981+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1982+
_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
19831983
__A = _mm_sub_sd(__A, __B);
19841984
return __builtin_ia32_selectsd_128(__U, __A, __W);
19851985
}
19861986

1987-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
1988-
_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1987+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
1988+
_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
19891989
__A = _mm_sub_sd(__A, __B);
19901990
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
19911991
}
@@ -2064,14 +2064,14 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
20642064
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
20652065
(__v16sf)_mm512_setzero_ps()))
20662066

2067-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2068-
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2067+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2068+
_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
20692069
__A = _mm_mul_ss(__A, __B);
20702070
return __builtin_ia32_selectss_128(__U, __A, __W);
20712071
}
20722072

2073-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2074-
_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2073+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2074+
_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
20752075
__A = _mm_mul_ss(__A, __B);
20762076
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
20772077
}
@@ -2093,14 +2093,14 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
20932093
(__v4sf)_mm_setzero_ps(), \
20942094
(__mmask8)(U), (int)(R)))
20952095

2096-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2097-
_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2096+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2097+
_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
20982098
__A = _mm_mul_sd(__A, __B);
20992099
return __builtin_ia32_selectsd_128(__U, __A, __W);
21002100
}
21012101

2102-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2103-
_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2102+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2103+
_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
21042104
__A = _mm_mul_sd(__A, __B);
21052105
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
21062106
}
@@ -2179,14 +2179,14 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
21792179
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
21802180
(__v16sf)_mm512_setzero_ps()))
21812181

2182-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2183-
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2182+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2183+
_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
21842184
__A = _mm_div_ss(__A, __B);
21852185
return __builtin_ia32_selectss_128(__U, __A, __W);
21862186
}
21872187

2188-
static __inline__ __m128 __DEFAULT_FN_ATTRS128
2189-
_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2188+
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
2189+
_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
21902190
__A = _mm_div_ss(__A, __B);
21912191
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
21922192
}
@@ -2209,14 +2209,14 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
22092209
(__v4sf)_mm_setzero_ps(), \
22102210
(__mmask8)(U), (int)(R)))
22112211

2212-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2213-
_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2212+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2213+
_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
22142214
__A = _mm_div_sd(__A, __B);
22152215
return __builtin_ia32_selectsd_128(__U, __A, __W);
22162216
}
22172217

2218-
static __inline__ __m128d __DEFAULT_FN_ATTRS128
2219-
_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2218+
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
2219+
_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
22202220
__A = _mm_div_sd(__A, __B);
22212221
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
22222222
}

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3302,6 +3302,8 @@ __m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
33023302
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
33033303
return _mm_mask_add_ss(__W,__U,__A,__B);
33043304
}
3305+
TEST_CONSTEXPR(match_v4sf(_mm_mask_add_ss((__m128)(__v4sf){10.0f, 100.0f, 200.0f, 300.0f}, 0x1,(__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f},(__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}),4.0f, 100.0f, 200.0f, 300.0f));
3306+
33053307
__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
33063308
// CHECK-LABEL: test_mm_maskz_add_ss
33073309
// CHECK-NOT: @llvm.x86.avx512.mask.add.ss.round
@@ -3317,6 +3319,8 @@ __m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
33173319
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
33183320
return _mm_maskz_add_ss(__U,__A,__B);
33193321
}
3322+
TEST_CONSTEXPR(match_v4sf(_mm_maskz_add_ss(0x1, (__m128)(__v4sf){1.25f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.75f, 6.0f, 7.0f, 8.0f}), 4.0f, 0.0f, 0.0f, 0.0f));
3323+
33203324
__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
33213325
// CHECK-LABEL: test_mm_add_round_sd
33223326
// CHECK: @llvm.x86.avx512.mask.add.sd.round
@@ -3347,6 +3351,8 @@ __m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
33473351
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
33483352
return _mm_mask_add_sd(__W,__U,__A,__B);
33493353
}
3354+
TEST_CONSTEXPR(match_v2df(_mm_mask_add_sd((__m128d)(__v2df){10.0, 999.0}, 0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 999.0));
3355+
33503356
__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
33513357
// CHECK-LABEL: test_mm_maskz_add_sd
33523358
// CHECK-NOT: @llvm.x86.avx512.mask.add.sd.round
@@ -3362,6 +3368,8 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
33623368
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
33633369
return _mm_maskz_add_sd(__U,__A,__B);
33643370
}
3371+
TEST_CONSTEXPR(match_v2df(_mm_maskz_add_sd(0x1, (__m128d)(__v2df){5.5, 77.0}, (__m128d)(__v2df){0.25, 88.0}), 5.75, 0.0));
3372+
33653373
__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
33663374
// CHECK-LABEL: test_mm512_sub_round_pd
33673375
// CHECK: @llvm.x86.avx512.sub.pd.512
@@ -3450,6 +3458,8 @@ __m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
34503458
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
34513459
return _mm_mask_sub_ss(__W,__U,__A,__B);
34523460
}
3461+
TEST_CONSTEXPR(match_v4sf(_mm_mask_sub_ss((__m128)(__v4sf){-1.0f, 10.0f, 20.0f, 30.0f}, 0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 10.0f, 20.0f, 30.0f));
3462+
34533463
__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
34543464
// CHECK-LABEL: test_mm_maskz_sub_ss
34553465
// CHECK-NOT: @llvm.x86.avx512.mask.sub.ss.round
@@ -3465,6 +3475,8 @@ __m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
34653475
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
34663476
return _mm_maskz_sub_ss(__U,__A,__B);
34673477
}
3478+
TEST_CONSTEXPR(match_v4sf(_mm_maskz_sub_ss(0x1, (__m128)(__v4sf){7.0f, 3.0f, 4.0f, 5.0f}, (__m128)(__v4sf){2.5f, 6.0f, 7.0f, 8.0f}), 4.5f, 0.0f, 0.0f, 0.0f));
3479+
34683480
__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
34693481
// CHECK-LABEL: test_mm_sub_round_sd
34703482
// CHECK: @llvm.x86.avx512.mask.sub.sd.round
@@ -3495,6 +3507,8 @@ __m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
34953507
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
34963508
return _mm_mask_sub_sd(__W,__U,__A,__B);
34973509
}
3510+
TEST_CONSTEXPR(match_v2df(_mm_mask_sub_sd((__m128d)(__v2df){-1.0, 111.0}, 0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 111.0));
3511+
34983512
__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
34993513
// CHECK-LABEL: test_mm_maskz_sub_sd
35003514
// CHECK-NOT: @llvm.x86.avx512.mask.sub.sd.round
@@ -3510,6 +3524,8 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
35103524
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
35113525
return _mm_maskz_sub_sd(__U,__A,__B);
35123526
}
3527+
TEST_CONSTEXPR(match_v2df(_mm_maskz_sub_sd(0x1, (__m128d)(__v2df){9.0, 70.0}, (__m128d)(__v2df){3.5, 80.0}), 5.5, 0.0));
3528+
35133529
__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
35143530
// CHECK-LABEL: test_mm512_mul_round_pd
35153531
// CHECK: @llvm.x86.avx512.mul.pd.512
@@ -3598,6 +3614,8 @@ __m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
35983614
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
35993615
return _mm_mask_mul_ss(__W,__U,__A,__B);
36003616
}
3617+
TEST_CONSTEXPR(match_v4sf(_mm_mask_mul_ss((__m128)(__v4sf){42.0f, -1.0f, -2.0f, -3.0f}, 0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, -1.0f, -2.0f, -3.0f));
3618+
36013619
__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
36023620
// CHECK-LABEL: test_mm_maskz_mul_ss
36033621
// CHECK-NOT: @llvm.x86.avx512.mask.mul.ss.round
@@ -3613,6 +3631,8 @@ __m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
36133631
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
36143632
return _mm_maskz_mul_ss(__U,__A,__B);
36153633
}
3634+
TEST_CONSTEXPR(match_v4sf(_mm_maskz_mul_ss(0x1, (__m128)(__v4sf){6.0f, 9.0f, 9.0f, 9.0f}, (__m128)(__v4sf){7.0f, 8.0f, 8.0f, 8.0f}), 42.0f, 0.0f, 0.0f, 0.0f));
3635+
36163636
__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
36173637
// CHECK-LABEL: test_mm_mul_round_sd
36183638
// CHECK: @llvm.x86.avx512.mask.mul.sd.round
@@ -3643,6 +3663,8 @@ __m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
36433663
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
36443664
return _mm_mask_mul_sd(__W,__U,__A,__B);
36453665
}
3666+
TEST_CONSTEXPR(match_v2df(_mm_mask_mul_sd((__m128d)(__v2df){123.0, -9.0}, 0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, -9.0));
3667+
36463668
__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
36473669
// CHECK-LABEL: test_mm_maskz_mul_sd
36483670
// CHECK-NOT: @llvm.x86.avx512.mask.mul.sd.round
@@ -3658,6 +3680,8 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
36583680
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
36593681
return _mm_maskz_mul_sd(__U,__A,__B);
36603682
}
3683+
TEST_CONSTEXPR(match_v2df(_mm_maskz_mul_sd(0x1, (__m128d)(__v2df){2.5, 1.0}, (__m128d)(__v2df){4.0, 2.0}), 10.0, 0.0));
3684+
36613685
__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
36623686
// CHECK-LABEL: test_mm512_div_round_pd
36633687
// CHECK: @llvm.x86.avx512.div.pd.512
@@ -3757,6 +3781,8 @@ __m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
37573781
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
37583782
return _mm_mask_div_ss(__W,__U,__A,__B);
37593783
}
3784+
TEST_CONSTEXPR(match_v4sf(_mm_mask_div_ss((__m128)(__v4sf){-7.0f, 5.0f, 6.0f, 7.0f}, 0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 5.0f, 6.0f, 7.0f));
3785+
37603786
__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
37613787
// CHECK-LABEL: test_mm_maskz_div_ss
37623788
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
@@ -3771,6 +3797,8 @@ __m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
37713797
// CHECK-NEXT: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0
37723798
return _mm_maskz_div_ss(__U,__A,__B);
37733799
}
3800+
TEST_CONSTEXPR(match_v4sf(_mm_maskz_div_ss(0x1, (__m128)(__v4sf){9.0f, 1.0f, 1.0f, 1.0f}, (__m128)(__v4sf){3.0f, 2.0f, 2.0f, 2.0f}), 3.0f, 0.0f, 0.0f, 0.0f));
3801+
37743802
__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
37753803
// CHECK-LABEL: test_mm_div_round_sd
37763804
// CHECK: @llvm.x86.avx512.mask.div.sd.round
@@ -3800,6 +3828,8 @@ __m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
38003828
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
38013829
return _mm_mask_div_sd(__W,__U,__A,__B);
38023830
}
3831+
TEST_CONSTEXPR(match_v2df(_mm_mask_div_sd((__m128d)(__v2df){-8.0, 44.0}, 0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 44.0));
3832+
38033833
__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
38043834
// CHECK-LABEL: test_mm_maskz_div_sd
38053835
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -3814,6 +3844,8 @@ __m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
38143844
// CHECK-NEXT: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0
38153845
return _mm_maskz_div_sd(__U,__A,__B);
38163846
}
3847+
TEST_CONSTEXPR(match_v2df(_mm_maskz_div_sd(0x1, (__m128d)(__v2df){8.0, 10.0}, (__m128d)(__v2df){2.0, 20.0}), 4.0, 0.0));
3848+
38173849
__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
38183850
// CHECK-LABEL: test_mm_max_round_ss
38193851
// CHECK: @llvm.x86.avx512.mask.max.ss.round

0 commit comments

Comments
 (0)