Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
Expand Down Expand Up @@ -584,9 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
Expand Down Expand Up @@ -647,6 +647,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;

def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -1017,6 +1021,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
Expand Down Expand Up @@ -1990,13 +1995,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
Expand Down Expand Up @@ -2026,8 +2031,7 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
}

let Features = "avx512f",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
Expand Down Expand Up @@ -3266,7 +3270,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
}
Expand Down
73 changes: 73 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2773,6 +2773,64 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}

enum class Half { None, Low, High };

static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
const CallExpr *Call, Half whichHalf) {
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
APSInt ControlImm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

unsigned NumElems = Dst.getNumElems();
PrimType ElemT = Dst.getFieldDesc()->getPrimType();

unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8);
if (ElemBits != 16 && ElemBits != 32)
return false;

unsigned TotalBits = NumElems * ElemBits;
unsigned LaneBits = (TotalBits == 64) ? 64u : 128u;
unsigned LaneElts = LaneBits / ElemBits;
assert(LaneElts && (NumElems % LaneElts == 0));

uint8_t ctl = static_cast<uint8_t>(ControlImm.getZExtValue());

for (unsigned idx = 0; idx != NumElems; idx++) {
unsigned LaneBase = (idx / LaneElts) * LaneElts;
unsigned LaneIdx = idx % LaneElts;

unsigned SrcIdx = idx;

if (ElemBits == 32) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
} else {
if (LaneElts == 4) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
} else {
constexpr unsigned HalfSize = 4;
if (whichHalf == Half::Low && LaneIdx < HalfSize) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
} else if (whichHalf == Half::High && LaneIdx >= HalfSize) {
unsigned rel = LaneIdx - HalfSize;
unsigned sel = (ctl >> (2 * rel)) & 0x3;
SrcIdx = LaneBase + HalfSize + sel;
} else if (whichHalf == Half::None) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
}
}
}

INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(idx) = Src.elem<T>(SrcIdx); });
}
Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
Expand Down Expand Up @@ -3606,6 +3664,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_selectpd_512:
return interp__builtin_select(S, OpPC, Call);

case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512:
return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::Low);

case X86::BI__builtin_ia32_pshufhw:
case X86::BI__builtin_ia32_pshufhw256:
case X86::BI__builtin_ia32_pshufhw512:
return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::High);

case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512:
return interp__builtin_ia32_pshuf(S, OpPC, Call, Half::None);

case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down
95 changes: 94 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11615,6 +11615,72 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
return true;
}

static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
unsigned ElemBits, unsigned HalfBase,
APValue &Out) {
APValue Vec;
APSInt Imm;
if (!EvaluateAsRValue(Info, Call->getArg(0), Vec))
return false;
if (!EvaluateInteger(Call->getArg(1), Imm, Info))
return false;

const auto *VT = Call->getType()->getAs<VectorType>();
if (!VT)
return false;
unsigned NumElts = VT->getNumElements();

unsigned TotalBits = NumElts * ElemBits;
unsigned LaneBits = (TotalBits == 64) ? 64u : 128u;
unsigned LaneElts = LaneBits / ElemBits;
if (!LaneElts || (NumElts % LaneElts) != 0)
return false;

uint8_t ctl = static_cast<uint8_t>(Imm.getZExtValue());

SmallVector<APValue, 32> ResultElements;
ResultElements.reserve(NumElts);

for (unsigned idx = 0; idx != NumElts; idx++) {
unsigned LaneBase = (idx / LaneElts) * LaneElts;
unsigned LaneIdx = idx % LaneElts;

unsigned SrcIdx = idx;

if (ElemBits == 32) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
} else {
if (LaneElts == 4) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
} else {
constexpr unsigned HalfSize = 4;
if (HalfBase == 0) {
if (LaneIdx < HalfSize) {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
}
} else if (HalfBase == HalfSize) {
if (LaneIdx >= HalfSize) {
unsigned rel = LaneIdx - HalfSize;
unsigned sel = (ctl >> (2 * rel)) & 0x3;
SrcIdx = LaneBase + HalfBase + sel;
}
} else {
unsigned sel = (ctl >> (2 * LaneIdx)) & 0x3;
SrcIdx = LaneBase + sel;
}
}
}

ResultElements.push_back(Vec.getVectorElt(SrcIdx));
}

Out = APValue(ResultElements.data(), ResultElements.size());
return true;
}

bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
Expand Down Expand Up @@ -11868,7 +11934,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
Expand Down Expand Up @@ -12087,6 +12152,34 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512: {
APValue R;
if (!evalPshufBuiltin(Info, E, /*ElemBits=*/16, /*HalfBaseElems=*/0, R))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_pshufhw:
case X86::BI__builtin_ia32_pshufhw256:
case X86::BI__builtin_ia32_pshufhw512: {
APValue R;
if (!evalPshufBuiltin(Info, E, /*ElemBits=*/16, /*HalfBaseElems=*/4, R))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512: {
APValue R;
if (!evalPshufBuiltin(Info, E, /*ElemBits=*/32, /*HalfBaseElems=*/~0u, R))
return false;
return Success(R, E);
}

case Builtin::BI__builtin_elementwise_clzg:
case Builtin::BI__builtin_elementwise_ctzg: {
APValue SourceLHS;
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1109,19 +1109,19 @@ __m256i test_mm256_shuffle_epi32(__m256i a) {
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
return _mm256_shuffle_epi32(a, 15);
}

TEST_CONSTEXPR(match_v8si(_mm256_shuffle_epi32((((__m256i)(__v8si){0,1,2,3,4,5,6,7})), 15), 3,3,0,0, 7,7,4,4));
__m256i test_mm256_shufflehi_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_shufflehi_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
return _mm256_shufflehi_epi16(a, 107);
}

TEST_CONSTEXPR(match_v16hi(_mm256_shufflehi_epi16((((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 107), 0,1,2,3, 7,6,6,5, 8,9,10,11, 15,14,14,13));
__m256i test_mm256_shufflelo_epi16(__m256i a) {
// CHECK-LABEL: test_mm256_shufflelo_epi16
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
return _mm256_shufflelo_epi16(a, 83);
}

TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sign_epi8
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
Expand Down
Loading