Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 57 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetch : X86LibBuiltin<"void(void *)">;
}

let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in {
def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant int)">;
}

// PRFCHW
let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
Expand Down Expand Up @@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
Expand Down Expand Up @@ -569,6 +576,12 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
Expand All @@ -584,9 +597,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
Expand Down Expand Up @@ -1989,9 +1999,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, _Vector<16, int>, unsigned short)">;
def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, unsigned short)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, _Vector<8, int>, unsigned char)">;
def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, _Vector<4, int>, unsigned char)">;
def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}

Expand Down Expand Up @@ -3266,7 +3295,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
}
Expand Down Expand Up @@ -5114,3 +5142,25 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pshuflw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
def pshufhw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">;
def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">;
}


let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def pshuflw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
def pshufhw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">;
def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pshuflw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
def pshufhw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">;
def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">;
}
246 changes: 246 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
const unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);
APSInt K;
Pointer SrcPT;
const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
const bool IsMaskZ = (NumArgs == 3);
if (NumArgs == 4) {
K = popToAPSInt(S, Call->getArg(3));
SrcPT = S.Stk.pop<Pointer>();
} else if (NumArgs == 3) {
K = popToAPSInt(S, Call->getArg(2));
}

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
const unsigned NumElems = Dst.getNumElems();
const PrimType ElemT = Dst.getFieldDesc()->getPrimType();
const unsigned ElemBits = 16;
const unsigned LaneElems = 128u / ElemBits;
const unsigned Half = 4;
assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes");
const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());

for (unsigned i = 0; i != NumElems; ++i) {
const unsigned laneBase = (i / LaneElems) * LaneElems;
const unsigned inLane = i % LaneElems;

unsigned srcIdx;
if (inLane < Half) {
const unsigned pos = inLane;
const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
srcIdx = laneBase + sel;
} else {
srcIdx = i;
}

APSInt Chosen;
INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });

if (!HasMask) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
continue;
}

const bool Keep =
(i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;

if (Keep) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
} else if (IsMaskZ) {
APSInt Zero(APInt(Chosen.getBitWidth(), 0));
Zero.setIsSigned(Chosen.isSigned());
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Zero); });
} else {
APSInt PT;
INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
(void)OpPC;
const unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);

APSInt K;
Pointer SrcPT;
const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
const bool IsMaskZ = (NumArgs == 3);

if (NumArgs == 4) {
K = popToAPSInt(S, Call->getArg(3));
SrcPT = S.Stk.pop<Pointer>();
} else if (NumArgs == 3) {
K = popToAPSInt(S, Call->getArg(2));
}

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

const unsigned NumElems = Dst.getNumElems();
const PrimType ElemT = Dst.getFieldDesc()->getPrimType();

const unsigned ElemBits = 16;
const unsigned LaneElems = 128u / ElemBits;
const unsigned HalfBase = 4;
assert(NumElems % LaneElems == 0);

const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());

for (unsigned i = 0; i != NumElems; ++i) {
const unsigned laneBase = (i / LaneElems) * LaneElems;
const unsigned inLane = i % LaneElems;

unsigned srcIdx;
if (inLane >= HalfBase) {
const unsigned pos = inLane - HalfBase;
const unsigned sel = (Ctl >> (2 * pos)) & 0x3;
srcIdx = laneBase + HalfBase + sel;
} else {
srcIdx = i;
}

APSInt Chosen;
INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });

if (!HasMask) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
continue;
}

const bool Keep =
(i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
if (Keep) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
} else if (IsMaskZ) {
APSInt Zero(APInt(Chosen.getBitWidth(), 0));
Zero.setIsSigned(Chosen.isSigned());
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Zero); });
} else {
APSInt PT;
INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
(void)OpPC;
const unsigned NumArgs = Call->getNumArgs();
assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4);

APSInt K;
Pointer SrcPT;
const bool HasMask = (NumArgs == 3) || (NumArgs == 4);
const bool IsMaskZ = (NumArgs == 3);

if (NumArgs == 4) {
K = popToAPSInt(S, Call->getArg(3));
SrcPT = S.Stk.pop<Pointer>();
} else if (NumArgs == 3) {
K = popToAPSInt(S, Call->getArg(2));
}

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

const unsigned NumElems = Dst.getNumElems();
const PrimType ElemT = Dst.getFieldDesc()->getPrimType();

const unsigned ElemBits = 32;
const unsigned LaneElems = 128u / ElemBits;
assert(NumElems % LaneElems == 0);

const uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());

for (unsigned i = 0; i != NumElems; ++i) {
const unsigned laneBase = (i / LaneElems) * LaneElems;
const unsigned inLane = i % LaneElems;
const unsigned sel = (Ctl >> (2 * inLane)) & 0x3;
const unsigned srcIdx = laneBase + sel;

APSInt Chosen;
INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem<T>(srcIdx).toAPSInt(); });

if (!HasMask) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
continue;
}

const bool Keep =
(i < static_cast<unsigned>(K.getBitWidth())) ? K[i] : false;
if (Keep) {
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Chosen); });
} else if (IsMaskZ) {
APSInt Zero(APInt(Chosen.getBitWidth(), 0));
Zero.setIsSigned(Chosen.isSigned());
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(i) = static_cast<T>(Zero); });
} else {
APSInt PT;
INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem<T>(i).toAPSInt(); });
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(i) = static_cast<T>(PT); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
Expand Down Expand Up @@ -2967,6 +3179,7 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}


bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -3417,6 +3630,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
llvm::APIntOps::mulhs);

case clang::X86::BI__builtin_ia32_pshuflw:
case clang::X86::BI__builtin_ia32_pshuflw256:
case clang::X86::BI__builtin_ia32_pshuflw512:
case clang::X86::BI__builtin_ia32_pshuflw128_mask:
case clang::X86::BI__builtin_ia32_pshuflw256_mask:
case clang::X86::BI__builtin_ia32_pshuflw512_mask:
case clang::X86::BI__builtin_ia32_pshuflw128_maskz:
case clang::X86::BI__builtin_ia32_pshuflw256_maskz:
case clang::X86::BI__builtin_ia32_pshuflw512_maskz:
return interp__builtin_ia32_pshuflw_common(S, OpPC, Call);

case clang::X86::BI__builtin_ia32_pshufhw:
case clang::X86::BI__builtin_ia32_pshufhw256:
case clang::X86::BI__builtin_ia32_pshufhw512:
case clang::X86::BI__builtin_ia32_pshufhw128_mask:
case clang::X86::BI__builtin_ia32_pshufhw256_mask:
case clang::X86::BI__builtin_ia32_pshufhw512_mask:
case clang::X86::BI__builtin_ia32_pshufhw128_maskz:
case clang::X86::BI__builtin_ia32_pshufhw256_maskz:
case clang::X86::BI__builtin_ia32_pshufhw512_maskz:
return interp__builtin_ia32_pshufhw_common(S, OpPC, Call);

case clang::X86::BI__builtin_ia32_pshufd:
case clang::X86::BI__builtin_ia32_pshufd256:
case clang::X86::BI__builtin_ia32_pshufd512:
case clang::X86::BI__builtin_ia32_pshufd128_mask:
case clang::X86::BI__builtin_ia32_pshufd256_mask:
case clang::X86::BI__builtin_ia32_pshufd512_mask:
case clang::X86::BI__builtin_ia32_pshufd128_maskz:
case clang::X86::BI__builtin_ia32_pshufd256_maskz:
case clang::X86::BI__builtin_ia32_pshufd512_maskz:
return interp__builtin_ia32_pshufd_common(S, OpPC, Call);

case clang::X86::BI__builtin_ia32_psllv2di:
case clang::X86::BI__builtin_ia32_psllv4di:
case clang::X86::BI__builtin_ia32_psllv4si:
Expand Down
Loading
Loading