Skip to content

Commit 0a268f8

Browse files
authored
[Headers][X86] VectorExprEvaluator::VisitCallExpr - allow SSE/AVX2/AVX512 pack intrinsics to be used in constexpr (llvm#156003)
Fixes llvm#154283
1 parent d94a828 commit 0a268f8

File tree

13 files changed

+196
-59
lines changed

13 files changed

+196
-59
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
9393
}
9494

9595
let Features = "sse2" in {
96-
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
97-
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
98-
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
9996
def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
10097
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
10198
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
@@ -108,6 +105,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
108105
def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
109106
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
110107
def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
108+
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
109+
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
110+
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
111111
}
112112

113113
let Features = "sse3" in {
@@ -312,7 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
312312

313313
let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
314314
def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
315-
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
316315
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
317316
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
318317
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -338,6 +337,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector
338337
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
339338

340339
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
340+
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
341341
}
342342

343343
let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
@@ -571,10 +571,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
571571

572572
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
573573
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
574-
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
575-
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
576-
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
577-
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
578574
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
579575
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
580576
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -647,6 +643,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
647643
def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
648644

649645
def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
646+
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
647+
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
648+
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
649+
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
650650
}
651651

652652
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1308,11 +1308,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
13081308

13091309
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
13101310
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
1311-
def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
1311+
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
1312+
}
1313+
1314+
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13121315
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
1313-
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
1316+
def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13141317
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
1315-
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
1318+
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13161319
}
13171320

13181321
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,6 +2596,51 @@ static bool interp__builtin_elementwise_int_binop(
25962596
return true;
25972597
}
25982598

2599+
static bool
2600+
interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
2601+
llvm::function_ref<APInt(const APSInt &)> PackFn) {
2602+
const auto *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
2603+
const auto *VT1 = E->getArg(1)->getType()->castAs<VectorType>();
2604+
assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
2605+
assert(VT0->getElementType() == VT1->getElementType() &&
2606+
VT0->getNumElements() == VT1->getNumElements() &&
2607+
"pack builtin VT0 and VT1 ElementType must be same");
2608+
2609+
const Pointer &RHS = S.Stk.pop<Pointer>();
2610+
const Pointer &LHS = S.Stk.pop<Pointer>();
2611+
const Pointer &Dst = S.Stk.peek<Pointer>();
2612+
2613+
const ASTContext &ASTCtx = S.getASTContext();
2614+
const unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
2615+
const unsigned LHSVecLen = VT0->getNumElements();
2616+
const unsigned SrcPerLane = 128 / SrcBits;
2617+
const unsigned Lanes = LHSVecLen * SrcBits / 128;
2618+
2619+
PrimType SrcT = *S.getContext().classify(VT0->getElementType());
2620+
PrimType DstT = *S.getContext().classify(getElemType(Dst));
2621+
const bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();
2622+
2623+
for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
2624+
const unsigned BaseSrc = Lane * SrcPerLane;
2625+
const unsigned BaseDst = Lane * (2 * SrcPerLane);
2626+
2627+
for (unsigned I = 0; I != SrcPerLane; ++I) {
2628+
INT_TYPE_SWITCH_NO_BOOL(SrcT, {
2629+
APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
2630+
APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();
2631+
2632+
assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
2633+
APSInt(PackFn(A), IsUnsigend));
2634+
assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
2635+
APSInt(PackFn(B), IsUnsigend));
2636+
});
2637+
}
2638+
}
2639+
2640+
Dst.initializeAllElements();
2641+
return true;
2642+
}
2643+
25992644
static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
26002645
const CallExpr *Call,
26012646
unsigned BuiltinID) {
@@ -3475,6 +3520,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34753520
}
34763521
return LHS.lshr(RHS.getZExtValue());
34773522
});
3523+
case clang::X86::BI__builtin_ia32_packsswb128:
3524+
case clang::X86::BI__builtin_ia32_packsswb256:
3525+
case clang::X86::BI__builtin_ia32_packsswb512:
3526+
case clang::X86::BI__builtin_ia32_packssdw128:
3527+
case clang::X86::BI__builtin_ia32_packssdw256:
3528+
case clang::X86::BI__builtin_ia32_packssdw512:
3529+
return interp__builtin_x86_pack(S, OpPC, Call, [](const APSInt &Src) {
3530+
return APInt(Src).truncSSat(Src.getBitWidth() / 2);
3531+
});
3532+
case clang::X86::BI__builtin_ia32_packusdw128:
3533+
case clang::X86::BI__builtin_ia32_packusdw256:
3534+
case clang::X86::BI__builtin_ia32_packusdw512:
3535+
case clang::X86::BI__builtin_ia32_packuswb128:
3536+
case clang::X86::BI__builtin_ia32_packuswb256:
3537+
case clang::X86::BI__builtin_ia32_packuswb512:
3538+
return interp__builtin_x86_pack(S, OpPC, Call, [](const APSInt &Src) {
3539+
unsigned DstBits = Src.getBitWidth() / 2;
3540+
if (Src.isNegative())
3541+
return APInt::getZero(DstBits);
3542+
if (Src.isIntN(DstBits))
3543+
return APInt(Src).trunc(DstBits);
3544+
return APInt::getAllOnes(DstBits);
3545+
});
34783546

34793547
case clang::X86::BI__builtin_ia32_vprotbi:
34803548
case clang::X86::BI__builtin_ia32_vprotdi:

clang/lib/AST/ExprConstant.cpp

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11575,6 +11575,46 @@ static bool handleVectorElementCast(EvalInfo &Info, const FPOptions FPO,
1157511575
return false;
1157611576
}
1157711577

11578+
static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
11579+
llvm::function_ref<APInt(const APSInt &)> PackFn) {
11580+
APValue LHS, RHS;
11581+
if (!EvaluateAsRValue(Info, E->getArg(0), LHS) ||
11582+
!EvaluateAsRValue(Info, E->getArg(1), RHS))
11583+
return false;
11584+
11585+
unsigned LHSVecLen = LHS.getVectorLength();
11586+
unsigned RHSVecLen = RHS.getVectorLength();
11587+
11588+
assert(LHSVecLen != 0 && LHSVecLen == RHSVecLen &&
11589+
"pack builtin LHSVecLen must equal to RHSVecLen");
11590+
11591+
const VectorType *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
11592+
const unsigned SrcBits = Info.Ctx.getIntWidth(VT0->getElementType());
11593+
11594+
const VectorType *DstVT = E->getType()->castAs<VectorType>();
11595+
QualType DstElemTy = DstVT->getElementType();
11596+
const bool DstIsUnsigned = DstElemTy->isUnsignedIntegerType();
11597+
11598+
const unsigned SrcPerLane = 128 / SrcBits;
11599+
const unsigned Lanes = LHSVecLen * SrcBits / 128;
11600+
11601+
SmallVector<APValue, 64> Out;
11602+
Out.reserve(LHSVecLen + RHSVecLen);
11603+
11604+
for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
11605+
unsigned base = Lane * SrcPerLane;
11606+
for (unsigned I = 0; I != SrcPerLane; ++I)
11607+
Out.emplace_back(APValue(
11608+
APSInt(PackFn(LHS.getVectorElt(base + I).getInt()), DstIsUnsigned)));
11609+
for (unsigned I = 0; I != SrcPerLane; ++I)
11610+
Out.emplace_back(APValue(
11611+
APSInt(PackFn(RHS.getVectorElt(base + I).getInt()), DstIsUnsigned)));
11612+
}
11613+
11614+
Result = APValue(Out.data(), Out.size());
11615+
return true;
11616+
}
11617+
1157811618
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1157911619
if (!IsConstantEvaluatedBuiltinCall(E))
1158011620
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -11768,7 +11808,29 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1176811808
}
1176911809
return LHS.lshr(RHS.getZExtValue());
1177011810
});
11771-
11811+
case X86::BI__builtin_ia32_packsswb128:
11812+
case X86::BI__builtin_ia32_packsswb256:
11813+
case X86::BI__builtin_ia32_packsswb512:
11814+
case X86::BI__builtin_ia32_packssdw128:
11815+
case X86::BI__builtin_ia32_packssdw256:
11816+
case X86::BI__builtin_ia32_packssdw512:
11817+
return evalPackBuiltin(E, Info, Result, [](const APSInt &Src) {
11818+
return APSInt(Src).truncSSat(Src.getBitWidth() / 2);
11819+
});
11820+
case X86::BI__builtin_ia32_packusdw128:
11821+
case X86::BI__builtin_ia32_packusdw256:
11822+
case X86::BI__builtin_ia32_packusdw512:
11823+
case X86::BI__builtin_ia32_packuswb128:
11824+
case X86::BI__builtin_ia32_packuswb256:
11825+
case X86::BI__builtin_ia32_packuswb512:
11826+
return evalPackBuiltin(E, Info, Result, [](const APSInt &Src) {
11827+
unsigned DstBits = Src.getBitWidth() / 2;
11828+
if (Src.isNegative())
11829+
return APInt::getZero(DstBits);
11830+
if (Src.isIntN(DstBits))
11831+
return APInt((Src).trunc(DstBits));
11832+
return APInt::getAllOnes(DstBits);
11833+
});
1177211834
case clang::X86::BI__builtin_ia32_pmuldq128:
1177311835
case clang::X86::BI__builtin_ia32_pmuldq256:
1177411836
case clang::X86::BI__builtin_ia32_pmuldq512:

clang/lib/Headers/avx2intrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,8 @@ _mm256_abs_epi32(__m256i __a) {
165165
/// A 256-bit vector of [16 x i16] used to generate result[127:64] and
166166
/// result[255:192].
167167
/// \returns A 256-bit integer vector containing the result.
168-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
169-
_mm256_packs_epi16(__m256i __a, __m256i __b)
170-
{
168+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
169+
_mm256_packs_epi16(__m256i __a, __m256i __b) {
171170
return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
172171
}
173172

@@ -197,9 +196,8 @@ _mm256_packs_epi16(__m256i __a, __m256i __b)
197196
/// A 256-bit vector of [8 x i32] used to generate result[127:64] and
198197
/// result[255:192].
199198
/// \returns A 256-bit vector of [16 x i16] containing the result.
200-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
201-
_mm256_packs_epi32(__m256i __a, __m256i __b)
202-
{
199+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
200+
_mm256_packs_epi32(__m256i __a, __m256i __b) {
203201
return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
204202
}
205203

@@ -228,9 +226,8 @@ _mm256_packs_epi32(__m256i __a, __m256i __b)
228226
/// A 256-bit vector of [16 x i16] used to generate result[127:64] and
229227
/// result[255:192].
230228
/// \returns A 256-bit integer vector containing the result.
231-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
232-
_mm256_packus_epi16(__m256i __a, __m256i __b)
233-
{
229+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
230+
_mm256_packus_epi16(__m256i __a, __m256i __b) {
234231
return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
235232
}
236233

@@ -260,9 +257,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b)
260257
/// A 256-bit vector of [8 x i32] used to generate result[127:64] and
261258
/// result[255:192].
262259
/// \returns A 256-bit vector of [16 x i16] containing the result.
263-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
264-
_mm256_packus_epi32(__m256i __V1, __m256i __V2)
265-
{
260+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
261+
_mm256_packus_epi32(__m256i __V1, __m256i __V2) {
266262
return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
267263
}
268264

clang/lib/Headers/avx512bwintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -510,9 +510,8 @@ _mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) {
510510
(__v32hi)_mm512_setzero_si512());
511511
}
512512

513-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
514-
_mm512_packs_epi32(__m512i __A, __m512i __B)
515-
{
513+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
514+
_mm512_packs_epi32(__m512i __A, __m512i __B) {
516515
return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
517516
}
518517

@@ -532,9 +531,8 @@ _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
532531
(__v32hi)__W);
533532
}
534533

535-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
536-
_mm512_packs_epi16(__m512i __A, __m512i __B)
537-
{
534+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
535+
_mm512_packs_epi16(__m512i __A, __m512i __B) {
538536
return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
539537
}
540538

@@ -554,9 +552,8 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
554552
(__v64qi)_mm512_setzero_si512());
555553
}
556554

557-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
558-
_mm512_packus_epi32(__m512i __A, __m512i __B)
559-
{
555+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
556+
_mm512_packus_epi32(__m512i __A, __m512i __B) {
560557
return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
561558
}
562559

@@ -576,9 +573,8 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
576573
(__v32hi)__W);
577574
}
578575

579-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
580-
_mm512_packus_epi16(__m512i __A, __m512i __B)
581-
{
576+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
577+
_mm512_packus_epi16(__m512i __A, __m512i __B) {
582578
return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
583579
}
584580

clang/lib/Headers/emmintrin.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4159,8 +4159,8 @@ void _mm_mfence(void);
41594159
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
41604160
/// written to the higher 64 bits of the result.
41614161
/// \returns A 128-bit vector of [16 x i8] containing the converted values.
4162-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
4163-
__m128i __b) {
4162+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4163+
_mm_packs_epi16(__m128i __a, __m128i __b) {
41644164
return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
41654165
}
41664166

@@ -4182,8 +4182,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
41824182
/// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
41834183
/// are written to the higher 64 bits of the result.
41844184
/// \returns A 128-bit vector of [8 x i16] containing the converted values.
4185-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
4186-
__m128i __b) {
4185+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4186+
_mm_packs_epi32(__m128i __a, __m128i __b) {
41874187
return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
41884188
}
41894189

@@ -4205,8 +4205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
42054205
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
42064206
/// written to the higher 64 bits of the result.
42074207
/// \returns A 128-bit vector of [16 x i8] containing the converted values.
4208-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
4209-
__m128i __b) {
4208+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4209+
_mm_packus_epi16(__m128i __a, __m128i __b) {
42104210
return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
42114211
}
42124212

0 commit comments

Comments
 (0)