Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}

let Features = "sse2" in {
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">;
def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">;
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
Expand All @@ -108,6 +105,9 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
}

let Features = "sse3" in {
Expand Down Expand Up @@ -312,7 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]

let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
Expand All @@ -338,6 +337,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;

def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "sse4.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -571,10 +571,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in

let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
Expand Down Expand Up @@ -647,6 +643,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;

def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">;
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -1308,11 +1308,14 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
}

let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down
68 changes: 68 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2604,6 +2604,51 @@ static bool interp__builtin_elementwise_int_binop(
return true;
}

static bool
interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
llvm::function_ref<APInt(const APSInt &)> PackFn) {
const auto *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
const auto *VT1 = E->getArg(1)->getType()->castAs<VectorType>();
assert(VT0 && VT1 && "pack builtin VT0 and VT1 must be VectorType");
assert(VT0->getElementType() == VT1->getElementType() &&
VT0->getNumElements() == VT1->getNumElements() &&
"pack builtin VT0 and VT1 ElementType must be same");

const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

const ASTContext &ASTCtx = S.getASTContext();
const unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
const unsigned LHSVecLen = VT0->getNumElements();
const unsigned SrcPerLane = 128 / SrcBits;
const unsigned Lanes = LHSVecLen * SrcBits / 128;

PrimType SrcT = *S.getContext().classify(VT0->getElementType());
PrimType DstT = *S.getContext().classify(getElemType(Dst));
const bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();

for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
const unsigned BaseSrc = Lane * SrcPerLane;
const unsigned BaseDst = Lane * (2 * SrcPerLane);

for (unsigned I = 0; I != SrcPerLane; ++I) {
INT_TYPE_SWITCH_NO_BOOL(SrcT, {
APSInt A = LHS.elem<T>(BaseSrc + I).toAPSInt();
APSInt B = RHS.elem<T>(BaseSrc + I).toAPSInt();

assignInteger(S, Dst.atIndex(BaseDst + I), DstT,
APSInt(PackFn(A), IsUnsigend));
assignInteger(S, Dst.atIndex(BaseDst + SrcPerLane + I), DstT,
APSInt(PackFn(B), IsUnsigend));
});
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
const CallExpr *Call,
unsigned BuiltinID) {
Expand Down Expand Up @@ -3477,6 +3522,29 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
}
return LHS.lshr(RHS.getZExtValue());
});
case clang::X86::BI__builtin_ia32_packsswb128:
case clang::X86::BI__builtin_ia32_packsswb256:
case clang::X86::BI__builtin_ia32_packsswb512:
case clang::X86::BI__builtin_ia32_packssdw128:
case clang::X86::BI__builtin_ia32_packssdw256:
case clang::X86::BI__builtin_ia32_packssdw512:
return interp__builtin_x86_pack(S, OpPC, Call, [](const APSInt &Src) {
return APInt(Src).truncSSat(Src.getBitWidth() / 2);
});
case clang::X86::BI__builtin_ia32_packusdw128:
case clang::X86::BI__builtin_ia32_packusdw256:
case clang::X86::BI__builtin_ia32_packusdw512:
case clang::X86::BI__builtin_ia32_packuswb128:
case clang::X86::BI__builtin_ia32_packuswb256:
case clang::X86::BI__builtin_ia32_packuswb512:
return interp__builtin_x86_pack(S, OpPC, Call, [](const APSInt &Src) {
unsigned DstBits = Src.getBitWidth() / 2;
if (Src.isNegative())
return APInt::getZero(DstBits);
if (Src.isIntN(DstBits))
return APInt(Src).trunc(DstBits);
return APInt::getAllOnes(DstBits);
});

case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
Expand Down
64 changes: 63 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11575,6 +11575,46 @@ static bool handleVectorElementCast(EvalInfo &Info, const FPOptions FPO,
return false;
}

static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
llvm::function_ref<APInt(const APSInt &)> PackFn) {
APValue LHS, RHS;
if (!EvaluateAsRValue(Info, E->getArg(0), LHS) ||
!EvaluateAsRValue(Info, E->getArg(1), RHS))
return false;

unsigned LHSVecLen = LHS.getVectorLength();
unsigned RHSVecLen = RHS.getVectorLength();

assert(LHSVecLen != 0 && LHSVecLen == RHSVecLen &&
"pack builtin LHSVecLen must equal to RHSVecLen");

const VectorType *VT0 = E->getArg(0)->getType()->castAs<VectorType>();
const unsigned SrcBits = Info.Ctx.getIntWidth(VT0->getElementType());

const VectorType *DstVT = E->getType()->castAs<VectorType>();
QualType DstElemTy = DstVT->getElementType();
const bool DstIsUnsigned = DstElemTy->isUnsignedIntegerType();

const unsigned srcPerLane = 128 / SrcBits;
const unsigned lanes = LHSVecLen * SrcBits / 128;

SmallVector<APValue, 64> Out;
Out.reserve(LHSVecLen + RHSVecLen);

for (unsigned lane = 0; lane != lanes; ++lane) {
unsigned base = lane * srcPerLane;
for (unsigned i = 0; i != srcPerLane; ++i)
Out.emplace_back(APValue(
APSInt(PackFn(LHS.getVectorElt(base + i).getInt()), DstIsUnsigned)));
for (unsigned i = 0; i != srcPerLane; ++i)
Out.emplace_back(APValue(
APSInt(PackFn(RHS.getVectorElt(base + i).getInt()), DstIsUnsigned)));
}

Result = APValue(Out.data(), Out.size());
return true;
}

bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
Expand Down Expand Up @@ -11768,7 +11808,29 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return LHS.lshr(RHS.getZExtValue());
});

case X86::BI__builtin_ia32_packsswb128:
case X86::BI__builtin_ia32_packsswb256:
case X86::BI__builtin_ia32_packsswb512:
case X86::BI__builtin_ia32_packssdw128:
case X86::BI__builtin_ia32_packssdw256:
case X86::BI__builtin_ia32_packssdw512:
return evalPackBuiltin(E, Info, Result, [](const APSInt &Src) {
return APSInt(Src).truncSSat(Src.getBitWidth() / 2);
});
case X86::BI__builtin_ia32_packusdw128:
case X86::BI__builtin_ia32_packusdw256:
case X86::BI__builtin_ia32_packusdw512:
case X86::BI__builtin_ia32_packuswb128:
case X86::BI__builtin_ia32_packuswb256:
case X86::BI__builtin_ia32_packuswb512:
return evalPackBuiltin(E, Info, Result, [](const APSInt &Src) {
unsigned DstBits = Src.getBitWidth() / 2;
if (Src.isNegative())
return APInt::getZero(DstBits);
if (Src.isIntN(DstBits))
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
Expand Down
20 changes: 8 additions & 12 deletions clang/lib/Headers/avx2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,8 @@ _mm256_abs_epi32(__m256i __a) {
/// A 256-bit vector of [16 x i16] used to generate result[127:64] and
/// result[255:192].
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_packs_epi16(__m256i __a, __m256i __b)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_packs_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
}

Expand Down Expand Up @@ -197,9 +196,8 @@ _mm256_packs_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] used to generate result[127:64] and
/// result[255:192].
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_packs_epi32(__m256i __a, __m256i __b)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_packs_epi32(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
}

Expand Down Expand Up @@ -228,9 +226,8 @@ _mm256_packs_epi32(__m256i __a, __m256i __b)
/// A 256-bit vector of [16 x i16] used to generate result[127:64] and
/// result[255:192].
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_packus_epi16(__m256i __a, __m256i __b)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_packus_epi16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
}

Expand Down Expand Up @@ -260,9 +257,8 @@ _mm256_packus_epi16(__m256i __a, __m256i __b)
/// A 256-bit vector of [8 x i32] used to generate result[127:64] and
/// result[255:192].
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_packus_epi32(__m256i __V1, __m256i __V2)
{
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_packus_epi32(__m256i __V1, __m256i __V2) {
return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
}

Expand Down
20 changes: 8 additions & 12 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -516,9 +516,8 @@ _mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) {
(__v32hi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_packs_epi32(__m512i __A, __m512i __B)
{
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_packs_epi32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
}

Expand All @@ -538,9 +537,8 @@ _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
(__v32hi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_packs_epi16(__m512i __A, __m512i __B)
{
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_packs_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
}

Expand All @@ -560,9 +558,8 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
(__v64qi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_packus_epi32(__m512i __A, __m512i __B)
{
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_packus_epi32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
}

Expand All @@ -582,9 +579,8 @@ _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
(__v32hi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_packus_epi16(__m512i __A, __m512i __B)
{
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_packus_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
}

Expand Down
12 changes: 6 additions & 6 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -4159,8 +4159,8 @@ void _mm_mfence(void);
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
/// written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [16 x i8] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_packs_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
}

Expand All @@ -4182,8 +4182,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
/// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
/// are written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [8 x i16] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_packs_epi32(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
}

Expand All @@ -4205,8 +4205,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
/// written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [16 x i8] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_packus_epi16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
}

Expand Down
Loading