Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetch : X86LibBuiltin<"void(void *)">;
}

let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in {
def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant int)">;
}

// PRFCHW
let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
Expand Down Expand Up @@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
Expand Down Expand Up @@ -584,9 +591,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
Expand Down Expand Up @@ -647,6 +651,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;

def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -1990,13 +1998,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
Expand All @@ -2016,21 +2024,35 @@ let Features = "avx512f",

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshuflw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
def pshufhw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psrlv16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pshuflw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
def pshufhw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pshuflw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
def pshufhw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
}

let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, _Vector<16, int>, unsigned short)">;
def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, unsigned short)">;
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
Expand All @@ -2047,10 +2069,14 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req

let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psravq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, _Vector<4, int>, unsigned char)">;
def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, _Vector<8, int>, unsigned char)">;
def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
Expand Down Expand Up @@ -3266,7 +3292,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
}
Expand Down
245 changes: 245 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
const unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
APSInt K;
Pointer SrcPT;
const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
const bool IsMaskZ = (NumArgs == 3);
if (NumArgs == 4) {
K = popToAPSInt(S, Call->getArg(3));
SrcPT = S.Stk.pop<Pointer>();
} else if (NumArgs == 3) {
K = popToAPSInt(S, Call->getArg(2));
}

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
const unsigned NumElems = Dst.getNumElems();
const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
const unsigned ElemBits = 16;
const unsigned LaneElems = 128u / ElemBits;
const unsigned Half = 4;
assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());

for (unsigned i = 0; i != NumElems; ++i) {
const unsigned laneBase = (i / LaneElems) * LaneElems;
const unsigned inLane = i % LaneElems;

unsigned srcIdx;
if (inLane < Half) {
const unsigned pos = inLane;
const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
srcIdx = laneBase + sel;
} else {
srcIdx = i;
}

APSInt Chosen;
INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });

if (!HasMask) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
continue;
}

const bool Keep =
(i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;

if (Keep) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
} else if (IsMaskZ) {
APSInt Zero(APInt(Chosen.getBitWidth(), 0));
Zero.setIsSigned(Chosen.isSigned());
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Zero); });
} else {
APSInt PT;
INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
(void)OpPC;
const unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);

APSInt K;
Pointer SrcPT;
const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
const bool IsMaskZ = (NumArgs == 3);

if (NumArgs == 4) {
K = popToAPSInt(S, Call->getArg(3));
SrcPT = S.Stk.pop<Pointer>();
} else if (NumArgs == 3) {
K = popToAPSInt(S, Call->getArg(2));
}

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

const unsigned NumElems = Dst.getNumElems();
const PrimType ElemT = Dst.getFieldDesc()->getPrimType();

const unsigned ElemBits = 16;
const unsigned LaneElems = 128u / ElemBits;
const unsigned HalfBase = 4;
assert(NumElems % LaneElems == 0);

const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());

for (unsigned i = 0; i != NumElems; ++i) {
const unsigned laneBase = (i / LaneElems) * LaneElems;
const unsigned inLane = i % LaneElems;

unsigned srcIdx;
if (inLane >= HalfBase) {
const unsigned pos = inLane - HalfBase;
const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
srcIdx = laneBase + HalfBase + sel;
} else {
srcIdx = i;
}

APSInt Chosen;
INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });

if (!HasMask) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
continue;
}

const bool Keep =
(i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
if (Keep) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
} else if (IsMaskZ) {
APSInt Zero(APInt(Chosen.getBitWidth(), 0));
Zero.setIsSigned(Chosen.isSigned());
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Zero); });
} else {
APSInt PT;
INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
(void)OpPC;
const unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);

APSInt K;
Pointer SrcPT;
const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
const bool IsMaskZ = (NumArgs == 3);

if (NumArgs == 4) {
K = popToAPSInt(S, Call->getArg(3));
SrcPT = S.Stk.pop<Pointer>();
} else if (NumArgs == 3) {
K = popToAPSInt(S, Call->getArg(2));
}

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

const unsigned NumElems = Dst.getNumElems();
const PrimType ElemT = Dst.getFieldDesc()->getPrimType();

const unsigned ElemBits = 32;
const unsigned LaneElems = 128u / ElemBits;
assert(NumElems % LaneElems == 0);

const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());

for (unsigned i = 0; i != NumElems; ++i) {
const unsigned laneBase = (i / LaneElems) * LaneElems;
const unsigned inLane = i % LaneElems;
const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
const unsigned srcIdx = laneBase + sel;

APSInt Chosen;
INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });

if (!HasMask) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
continue;
}

const bool Keep =
(i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
if (Keep) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
} else if (IsMaskZ) {
APSInt Zero(APInt(Chosen.getBitWidth(), 0));
Zero.setIsSigned(Chosen.isSigned());
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Zero); });
} else {
APSInt PT;
INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
Expand Down Expand Up @@ -3417,6 +3629,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
llvm::APIntOps::mulhs);

case clang::X86::BI__builtin_ia32_pshuflw:
case clang::X86::BI__builtin_ia32_pshuflw256:
case clang::X86::BI__builtin_ia32_pshuflw512:
case clang::X86::BI__builtin_ia32_pshuflw128_mask:
case clang::X86::BI__builtin_ia32_pshuflw256_mask:
case clang::X86::BI__builtin_ia32_pshuflw512_mask:
case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);

case clang::X86::BI__builtin_ia32_pshufhw:
case clang::X86::BI__builtin_ia32_pshufhw256:
case clang::X86::BI__builtin_ia32_pshufhw512:
case clang::X86::BI__builtin_ia32_pshufhw128_mask:
case clang::X86::BI__builtin_ia32_pshufhw256_mask:
case clang::X86::BI__builtin_ia32_pshufhw512_mask:
case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);

case clang::X86::BI__builtin_ia32_pshufd:
case clang::X86::BI__builtin_ia32_pshufd256:
case clang::X86::BI__builtin_ia32_pshufd512:
case clang::X86::BI__builtin_ia32_pshufd128_mask:
case clang::X86::BI__builtin_ia32_pshufd256_mask:
case clang::X86::BI__builtin_ia32_pshufd512_mask:
case clang::X86::BI__builtin_ia32_pshufd128_maskz:
case clang::X86::BI__builtin_ia32_pshufd256_maskz:
case clang::X86::BI__builtin_ia32_pshufd512_maskz:
return interp__builtin_ia32_pshufd_common(S, OpPC, Call);

case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
Expand Down
Loading