Skip to content

Commit e3f22d9

Browse files
authored
[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used in constexpr (llvm#160428)
Fix llvm#158653 Add handling for: ``` ptestz128 / ptestz256 → (a & b) == 0. ptestc128 / ptestc256 → (~a & b) == 0 ptestnzc128 / ptestnzc256 → (a & b) != 0 AND (~a & b) != 0. vtestzps / vtestzps256 → (S(a) & S(b)) == 0. vtestcps / vtestcps256 → (~S(a) & S(b)) == 0. vtestnzcps / vtestnzcps256 → (S(a) & S(b)) != 0 AND (~S(a) & S(b)) != 0. vtestzpd / vtestzpd256 → (S(a) & S(b)) == 0. vtestcpd / vtestcpd256 → (~S(a) & S(b)) == 0. vtestnzcpd / vtestnzcpd256 → (S(a) & S(b)) != 0 AND (~S(a) & S(b)) != 0. ``` Add corresponding test cases for: ``` int _mm_test_all_ones (__m128i a) int _mm_test_all_zeros (__m128i mask, __m128i a) int _mm_test_mix_ones_zeros (__m128i mask, __m128i a) int _mm_testc_pd (__m128d a, __m128d b) int _mm256_testc_pd (__m256d a, __m256d b) int _mm_testc_ps (__m128 a, __m128 b) int _mm256_testc_ps (__m256 a, __m256 b) int _mm_testc_si128 (__m128i a, __m128i b) int _mm256_testc_si256 (__m256i a, __m256i b) int _mm_testnzc_pd (__m128d a, __m128d b) int _mm256_testnzc_pd (__m256d a, __m256d b) int _mm_testnzc_ps (__m128 a, __m128 b) int _mm256_testnzc_ps (__m256 a, __m256 b) int _mm_testnzc_si128 (__m128i a, __m128i b) int _mm256_testnzc_si256 (__m256i a, __m256i b) int _mm_testz_pd (__m128d a, __m128d b) int _mm256_testz_pd (__m256d a, __m256d b) int _mm_testz_ps (__m128 a, __m128 b) int _mm256_testz_ps (__m256 a, __m256 b) int _mm_testz_si128 (__m128i a, __m128i b) int _mm256_testz_si256 (__m256i a, __m256i b) ```
1 parent 648b3aa commit e3f22d9

File tree

8 files changed

+253
-63
lines changed

8 files changed

+253
-63
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -323,14 +323,22 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
323323
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
324324
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
325325
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
326-
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
327-
def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
328-
def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
329-
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
326+
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
327+
"_Vector<2,double>, _Constant char)">;
330328
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
331329
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
332330
}
333331

332+
let Features = "sse4.1",
333+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
334+
def ptestz128
335+
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
336+
def ptestc128
337+
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
338+
def ptestnzc128
339+
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
340+
}
341+
334342
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
335343
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
336344
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -520,8 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
520528
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
521529
}
522530

523-
524-
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
531+
let Features = "avx",
532+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
525533
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
526534
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
527535
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -530,7 +538,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
530538
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
531539
}
532540

533-
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
541+
let Features = "avx",
542+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
534543
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
535544
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
536545
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -540,6 +549,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
540549
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
541550
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
542551
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
552+
}
553+
554+
let Features = "avx",
555+
Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
543556
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
544557
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
545558
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2756,6 +2756,45 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
27562756
return true;
27572757
}
27582758

2759+
static bool interp__builtin_ia32_test_op(
2760+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
2761+
llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
2762+
const Pointer &RHS = S.Stk.pop<Pointer>();
2763+
const Pointer &LHS = S.Stk.pop<Pointer>();
2764+
2765+
assert(LHS.getNumElems() == RHS.getNumElems());
2766+
2767+
unsigned SourceLen = LHS.getNumElems();
2768+
QualType ElemQT = getElemType(LHS);
2769+
OptPrimType ElemPT = S.getContext().classify(ElemQT);
2770+
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
2771+
2772+
APInt AWide(LaneWidth * SourceLen, 0);
2773+
APInt BWide(LaneWidth * SourceLen, 0);
2774+
2775+
for (unsigned I = 0; I != SourceLen; ++I) {
2776+
APInt ALane;
2777+
APInt BLane;
2778+
2779+
if (ElemQT->isIntegerType()) { // Get value.
2780+
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
2781+
ALane = LHS.elem<T>(I).toAPSInt();
2782+
BLane = RHS.elem<T>(I).toAPSInt();
2783+
});
2784+
} else if (ElemQT->isFloatingType()) { // Get only sign bit.
2785+
using T = PrimConv<PT_Float>::T;
2786+
ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
2787+
BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
2788+
} else { // Must be integer or floating type.
2789+
return false;
2790+
}
2791+
AWide.insertBits(ALane, I * LaneWidth);
2792+
BWide.insertBits(BLane, I * LaneWidth);
2793+
}
2794+
pushInteger(S, Fn(AWide, BWide), Call->getType());
2795+
return true;
2796+
}
2797+
27592798
static bool interp__builtin_elementwise_triop(
27602799
InterpState &S, CodePtr OpPC, const CallExpr *Call,
27612800
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3712,7 +3751,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
37123751
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
37133752
return ((APInt)C).isNegative() ? T : F;
37143753
});
3715-
3754+
case X86::BI__builtin_ia32_ptestz128:
3755+
case X86::BI__builtin_ia32_ptestz256:
3756+
case X86::BI__builtin_ia32_vtestzps:
3757+
case X86::BI__builtin_ia32_vtestzps256:
3758+
case X86::BI__builtin_ia32_vtestzpd:
3759+
case X86::BI__builtin_ia32_vtestzpd256:
3760+
return interp__builtin_ia32_test_op(
3761+
S, OpPC, Call,
3762+
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
3763+
case X86::BI__builtin_ia32_ptestc128:
3764+
case X86::BI__builtin_ia32_ptestc256:
3765+
case X86::BI__builtin_ia32_vtestcps:
3766+
case X86::BI__builtin_ia32_vtestcps256:
3767+
case X86::BI__builtin_ia32_vtestcpd:
3768+
case X86::BI__builtin_ia32_vtestcpd256:
3769+
return interp__builtin_ia32_test_op(
3770+
S, OpPC, Call,
3771+
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
3772+
case X86::BI__builtin_ia32_ptestnzc128:
3773+
case X86::BI__builtin_ia32_ptestnzc256:
3774+
case X86::BI__builtin_ia32_vtestnzcps:
3775+
case X86::BI__builtin_ia32_vtestnzcps256:
3776+
case X86::BI__builtin_ia32_vtestnzcpd:
3777+
case X86::BI__builtin_ia32_vtestnzcpd256:
3778+
return interp__builtin_ia32_test_op(
3779+
S, OpPC, Call, [](const APInt &A, const APInt &B) {
3780+
return ((A & B) != 0) && ((~A & B) != 0);
3781+
});
37163782
case X86::BI__builtin_ia32_selectb_128:
37173783
case X86::BI__builtin_ia32_selectb_256:
37183784
case X86::BI__builtin_ia32_selectb_512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13905,6 +13905,40 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
1390513905

1390613906
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1390713907
unsigned BuiltinOp) {
13908+
auto EvalTestOp = [&](llvm::function_ref<bool(const APInt &, const APInt &)>
13909+
Fn) {
13910+
APValue SourceLHS, SourceRHS;
13911+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
13912+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
13913+
return false;
13914+
13915+
unsigned SourceLen = SourceLHS.getVectorLength();
13916+
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
13917+
QualType ElemQT = VT->getElementType();
13918+
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
13919+
13920+
APInt AWide(LaneWidth * SourceLen, 0);
13921+
APInt BWide(LaneWidth * SourceLen, 0);
13922+
13923+
for (unsigned I = 0; I != SourceLen; ++I) {
13924+
APInt ALane;
13925+
APInt BLane;
13926+
if (ElemQT->isIntegerType()) { // Get value.
13927+
ALane = SourceLHS.getVectorElt(I).getInt();
13928+
BLane = SourceRHS.getVectorElt(I).getInt();
13929+
} else if (ElemQT->isFloatingType()) { // Get only sign bit.
13930+
ALane =
13931+
SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt().isNegative();
13932+
BLane =
13933+
SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt().isNegative();
13934+
} else { // Must be integer or floating type.
13935+
return false;
13936+
}
13937+
AWide.insertBits(ALane, I * LaneWidth);
13938+
BWide.insertBits(BLane, I * LaneWidth);
13939+
}
13940+
return Success(Fn(AWide, BWide), E);
13941+
};
1390813942

1390913943
auto HandleMaskBinOp =
1391013944
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
@@ -15018,7 +15052,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1501815052
Result.setBitVal(P++, Val[I]);
1501915053
return Success(Result, E);
1502015054
}
15021-
15055+
case X86::BI__builtin_ia32_ptestz128:
15056+
case X86::BI__builtin_ia32_ptestz256:
15057+
case X86::BI__builtin_ia32_vtestzps:
15058+
case X86::BI__builtin_ia32_vtestzps256:
15059+
case X86::BI__builtin_ia32_vtestzpd:
15060+
case X86::BI__builtin_ia32_vtestzpd256: {
15061+
return EvalTestOp(
15062+
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
15063+
}
15064+
case X86::BI__builtin_ia32_ptestc128:
15065+
case X86::BI__builtin_ia32_ptestc256:
15066+
case X86::BI__builtin_ia32_vtestcps:
15067+
case X86::BI__builtin_ia32_vtestcps256:
15068+
case X86::BI__builtin_ia32_vtestcpd:
15069+
case X86::BI__builtin_ia32_vtestcpd256: {
15070+
return EvalTestOp(
15071+
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
15072+
}
15073+
case X86::BI__builtin_ia32_ptestnzc128:
15074+
case X86::BI__builtin_ia32_ptestnzc256:
15075+
case X86::BI__builtin_ia32_vtestnzcps:
15076+
case X86::BI__builtin_ia32_vtestnzcps256:
15077+
case X86::BI__builtin_ia32_vtestnzcpd:
15078+
case X86::BI__builtin_ia32_vtestnzcpd256: {
15079+
return EvalTestOp([](const APInt &A, const APInt &B) {
15080+
return ((A & B) != 0) && ((~A & B) != 0);
15081+
});
15082+
}
1502215083
case X86::BI__builtin_ia32_kandqi:
1502315084
case X86::BI__builtin_ia32_kandhi:
1502415085
case X86::BI__builtin_ia32_kandsi:

clang/lib/Headers/avxintrin.h

Lines changed: 30 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2539,9 +2539,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
25392539
/// \param __b
25402540
/// A 128-bit vector of [2 x double].
25412541
/// \returns the ZF flag in the EFLAGS register.
2542-
static __inline int __DEFAULT_FN_ATTRS128
2543-
_mm_testz_pd(__m128d __a, __m128d __b)
2544-
{
2542+
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
2543+
__m128d __b) {
25452544
return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
25462545
}
25472546

@@ -2568,9 +2567,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
25682567
/// \param __b
25692568
/// A 128-bit vector of [2 x double].
25702569
/// \returns the CF flag in the EFLAGS register.
2571-
static __inline int __DEFAULT_FN_ATTRS128
2572-
_mm_testc_pd(__m128d __a, __m128d __b)
2573-
{
2570+
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
2571+
__m128d __b) {
25742572
return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
25752573
}
25762574

@@ -2598,9 +2596,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
25982596
/// \param __b
25992597
/// A 128-bit vector of [2 x double].
26002598
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2601-
static __inline int __DEFAULT_FN_ATTRS128
2602-
_mm_testnzc_pd(__m128d __a, __m128d __b)
2603-
{
2599+
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
2600+
_mm_testnzc_pd(__m128d __a, __m128d __b) {
26042601
return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
26052602
}
26062603

@@ -2627,9 +2624,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
26272624
/// \param __b
26282625
/// A 128-bit vector of [4 x float].
26292626
/// \returns the ZF flag.
2630-
static __inline int __DEFAULT_FN_ATTRS128
2631-
_mm_testz_ps(__m128 __a, __m128 __b)
2632-
{
2627+
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
2628+
__m128 __b) {
26332629
return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
26342630
}
26352631

@@ -2656,9 +2652,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
26562652
/// \param __b
26572653
/// A 128-bit vector of [4 x float].
26582654
/// \returns the CF flag.
2659-
static __inline int __DEFAULT_FN_ATTRS128
2660-
_mm_testc_ps(__m128 __a, __m128 __b)
2661-
{
2655+
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
2656+
__m128 __b) {
26622657
return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
26632658
}
26642659

@@ -2686,9 +2681,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
26862681
/// \param __b
26872682
/// A 128-bit vector of [4 x float].
26882683
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2689-
static __inline int __DEFAULT_FN_ATTRS128
2690-
_mm_testnzc_ps(__m128 __a, __m128 __b)
2691-
{
2684+
static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
2685+
__m128 __b) {
26922686
return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
26932687
}
26942688

@@ -2715,9 +2709,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
27152709
/// \param __b
27162710
/// A 256-bit vector of [4 x double].
27172711
/// \returns the ZF flag.
2718-
static __inline int __DEFAULT_FN_ATTRS
2719-
_mm256_testz_pd(__m256d __a, __m256d __b)
2720-
{
2712+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
2713+
__m256d __b) {
27212714
return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
27222715
}
27232716

@@ -2744,9 +2737,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
27442737
/// \param __b
27452738
/// A 256-bit vector of [4 x double].
27462739
/// \returns the CF flag.
2747-
static __inline int __DEFAULT_FN_ATTRS
2748-
_mm256_testc_pd(__m256d __a, __m256d __b)
2749-
{
2740+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
2741+
__m256d __b) {
27502742
return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
27512743
}
27522744

@@ -2774,9 +2766,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
27742766
/// \param __b
27752767
/// A 256-bit vector of [4 x double].
27762768
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2777-
static __inline int __DEFAULT_FN_ATTRS
2778-
_mm256_testnzc_pd(__m256d __a, __m256d __b)
2779-
{
2769+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
2770+
_mm256_testnzc_pd(__m256d __a, __m256d __b) {
27802771
return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
27812772
}
27822773

@@ -2803,9 +2794,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
28032794
/// \param __b
28042795
/// A 256-bit vector of [8 x float].
28052796
/// \returns the ZF flag.
2806-
static __inline int __DEFAULT_FN_ATTRS
2807-
_mm256_testz_ps(__m256 __a, __m256 __b)
2808-
{
2797+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
2798+
__m256 __b) {
28092799
return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
28102800
}
28112801

@@ -2832,9 +2822,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
28322822
/// \param __b
28332823
/// A 256-bit vector of [8 x float].
28342824
/// \returns the CF flag.
2835-
static __inline int __DEFAULT_FN_ATTRS
2836-
_mm256_testc_ps(__m256 __a, __m256 __b)
2837-
{
2825+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
2826+
__m256 __b) {
28382827
return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
28392828
}
28402829

@@ -2862,9 +2851,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
28622851
/// \param __b
28632852
/// A 256-bit vector of [8 x float].
28642853
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2865-
static __inline int __DEFAULT_FN_ATTRS
2866-
_mm256_testnzc_ps(__m256 __a, __m256 __b)
2867-
{
2854+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
2855+
__m256 __b) {
28682856
return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
28692857
}
28702858

@@ -2888,9 +2876,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
28882876
/// \param __b
28892877
/// A 256-bit integer vector.
28902878
/// \returns the ZF flag.
2891-
static __inline int __DEFAULT_FN_ATTRS
2892-
_mm256_testz_si256(__m256i __a, __m256i __b)
2893-
{
2879+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
2880+
_mm256_testz_si256(__m256i __a, __m256i __b) {
28942881
return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
28952882
}
28962883

@@ -2914,9 +2901,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
29142901
/// \param __b
29152902
/// A 256-bit integer vector.
29162903
/// \returns the CF flag.
2917-
static __inline int __DEFAULT_FN_ATTRS
2918-
_mm256_testc_si256(__m256i __a, __m256i __b)
2919-
{
2904+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
2905+
_mm256_testc_si256(__m256i __a, __m256i __b) {
29202906
return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
29212907
}
29222908

@@ -2941,9 +2927,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
29412927
/// \param __b
29422928
/// A 256-bit integer vector.
29432929
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2944-
static __inline int __DEFAULT_FN_ATTRS
2945-
_mm256_testnzc_si256(__m256i __a, __m256i __b)
2946-
{
2930+
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
2931+
_mm256_testnzc_si256(__m256i __a, __m256i __b) {
29472932
return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
29482933
}
29492934

0 commit comments

Comments
 (0)