Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a81c406
deal this issues 155395
Sep 4, 2025
2fadf3f
deal issues 15595
Sep 4, 2025
f8362b4
Merge branch 'llvm:main' into main
whytolearn Sep 4, 2025
ed4a09f
constexpr deal
Sep 11, 2025
df6242e
adjust unit test #146940
Sep 13, 2025
9f2fb43
Merge remote-tracking branch 'upstream/main'
Sep 13, 2025
929d7c0
Merge branch 'main' into main
whytolearn Sep 13, 2025
f91aa21
adjust test case and function
Sep 26, 2025
4f5fb87
undo the unintentional formatting of the code
Sep 26, 2025
2422cd4
Merge branch 'main' into main
whytolearn Sep 26, 2025
a3575c5
Merge branch 'main' into main
whytolearn Sep 26, 2025
b2cac3e
adjust code
Sep 26, 2025
197123a
adjust code for mm256
Sep 28, 2025
b733157
format code
Sep 28, 2025
1ce4883
Merge branch 'main' into main
whytolearn Sep 29, 2025
9a7c138
deal all 256 double pane ins
Oct 2, 2025
a65f4fc
deal all 256 double pane ins
Oct 2, 2025
9877317
adjust for 128 and 256 oprand
Oct 7, 2025
404d261
Merge branch 'main' into main
whytolearn Oct 7, 2025
1d61bf2
undo some bad format for .td file
Oct 7, 2025
b25aa5e
Merge branch 'main' into main
whytolearn Oct 9, 2025
4bc2341
merge disperse operation
Oct 10, 2025
242165a
Merge remote-tracking branch 'upstream/main'
Oct 10, 2025
d2e5d43
Merge remote-tracking branch 'upstream/main'
Oct 10, 2025
6d57df0
Merge branch 'main' into main
whytolearn Oct 11, 2025
03e4db0
Merge branch 'main' into main
RKSimon Oct 13, 2025
c2117f6
Update clang/lib/AST/ByteCode/InterpBuiltin.cpp
whytolearn Oct 13, 2025
5c7412f
Update clang/lib/AST/ByteCode/InterpBuiltin.cpp
whytolearn Oct 13, 2025
90200be
Merge branch 'main' into main
whytolearn Oct 13, 2025
5df6aff
Update clang/lib/AST/ExprConstant.cpp
whytolearn Oct 13, 2025
202c165
bad merger delate and code format
Oct 13, 2025
34ee8ed
Merge branch 'main' into main
whytolearn Oct 13, 2025
9ec2672
Merge branch 'main' into main
whytolearn Oct 13, 2025
7e15580
Merge branch 'main' into main
RKSimon Oct 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -110,19 +110,20 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}

let Features = "sse3" in {
foreach Op = ["addsub", "hadd", "hsub"] in {
foreach Op = ["addsub"] in {
def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
}
}

let Features = "ssse3" in {
foreach Op = ["phadd", "phsub"] in {
def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
let Features = "sse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
foreach Op = ["hadd", "hsub"] in {
def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
}
}

let Features = "ssse3" in {
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
Expand All @@ -137,7 +138,7 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {

// AVX
let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
foreach Op = ["addsub", "hadd", "hsub", "max", "min"] in {
foreach Op = ["addsub", "max", "min"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
}
Expand Down Expand Up @@ -316,6 +317,14 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">;
}

let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
foreach Op = ["phadd", "phsub"] in {
def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}
}

let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
Expand Down Expand Up @@ -515,6 +524,11 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;

foreach Op = ["hadd", "hsub"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
}
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
Expand Down Expand Up @@ -592,12 +606,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;

def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
Expand Down Expand Up @@ -666,6 +675,13 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;

def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;

def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
Expand Down
123 changes: 123 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2587,6 +2587,82 @@ static bool interp__builtin_ia32_pmul(
return true;
}

static bool interp_builtin_horizontal_int_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
PrimType ElemT = *S.getContext().classify(VT->getElementType());
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();

const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
unsigned NumElts = VT->getNumElements();
unsigned EltBits = S.getASTContext().getIntWidth(VT->getElementType());
unsigned EltsPerLane = 128 / EltBits;
unsigned Lanes = NumElts * EltBits / 128;
unsigned DestIndex = 0;

for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
unsigned LaneStart = Lane * EltsPerLane;
for (unsigned I = 0; I < EltsPerLane; I += 2) {
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
});
}

for (unsigned I = 0; I < EltsPerLane; I += 2) {
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
});
}
}
Dst.initializeAllElements();
return true;
}

static bool interp_builtin_horizontal_fp_binop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
llvm::RoundingMode)>
Fn) {
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
llvm::RoundingMode RM = getRoundingMode(FPO);
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();

unsigned NumElts = VT->getNumElements();
unsigned EltBits = S.getASTContext().getTypeSize(VT->getElementType());
unsigned NumLanes = NumElts * EltBits / 128;
unsigned NumElemsPerLane = NumElts / NumLanes;
unsigned HalfElemsPerLane = NumElemsPerLane / 2;

for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
using T = PrimConv<PT_Float>::T;
for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
APFloat Elem1 = LHS.elem<T>(L + (2 * E) + 0).getAPFloat();
APFloat Elem2 = LHS.elem<T>(L + (2 * E) + 1).getAPFloat();
Dst.elem<T>(L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
}
for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
APFloat Elem1 = RHS.elem<T>(L + (2 * E) + 0).getAPFloat();
APFloat Elem2 = RHS.elem<T>(L + (2 * E) + 1).getAPFloat();
Dst.elem<T>(L + E + HalfElemsPerLane) =
static_cast<T>(Fn(Elem1, Elem2, RM));
}
}
Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_triop_fp(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
Expand Down Expand Up @@ -3665,6 +3741,53 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_phaddw128:
case clang::X86::BI__builtin_ia32_phaddw256:
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256:
return interp_builtin_horizontal_int_binop(
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256:
return interp_builtin_horizontal_int_binop(
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
case clang::X86::BI__builtin_ia32_phsubw128:
case clang::X86::BI__builtin_ia32_phsubw256:
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256:
return interp_builtin_horizontal_int_binop(
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256:
return interp_builtin_horizontal_int_binop(
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
case clang::X86::BI__builtin_ia32_haddpd:
case clang::X86::BI__builtin_ia32_haddps:
case clang::X86::BI__builtin_ia32_haddpd256:
case clang::X86::BI__builtin_ia32_haddps256:
return interp_builtin_horizontal_fp_binop(
S, OpPC, Call,
[](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
APFloat F = LHS;
F.add(RHS, RM);
return F;
});
case clang::X86::BI__builtin_ia32_hsubpd:
case clang::X86::BI__builtin_ia32_hsubps:
case clang::X86::BI__builtin_ia32_hsubpd256:
case clang::X86::BI__builtin_ia32_hsubps256:
return interp_builtin_horizontal_fp_binop(
S, OpPC, Call,
[](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
APFloat F = LHS;
F.subtract(RHS, RM);
return F;
});

case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
Expand Down
163 changes: 163 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12381,6 +12381,169 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}

case clang::X86::BI__builtin_ia32_phaddw128:
case clang::X86::BI__builtin_ia32_phaddw256:
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256:
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256:

case clang::X86::BI__builtin_ia32_phsubw128:
case clang::X86::BI__builtin_ia32_phsubw256:
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256:
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();

unsigned NumElts = SourceLHS.getVectorLength();
unsigned EltBits = Info.Ctx.getIntWidth(DestEltTy);
unsigned EltsPerLane = 128 / EltBits;
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(NumElts);

for (unsigned LaneStart = 0; LaneStart != NumElts;
LaneStart += EltsPerLane) {
for (unsigned I = 0; I != EltsPerLane; I += 2) {
APSInt LHSA = SourceLHS.getVectorElt(LaneStart + I).getInt();
APSInt LHSB = SourceLHS.getVectorElt(LaneStart + I + 1).getInt();
switch (E->getBuiltinCallee()) {
case clang::X86::BI__builtin_ia32_phaddw128:
case clang::X86::BI__builtin_ia32_phaddw256:
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256: {
APSInt Res(LHSA + LHSB, DestUnsigned);
ResultElements.push_back(APValue(Res));
break;
}
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256: {
APSInt Res(LHSA.sadd_sat(LHSB));
ResultElements.push_back(APValue(Res));
break;
}
case clang::X86::BI__builtin_ia32_phsubw128:
case clang::X86::BI__builtin_ia32_phsubw256:
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256: {
APSInt Res(LHSA - LHSB, DestUnsigned);
ResultElements.push_back(APValue(Res));
break;
}
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256: {
APSInt Res(LHSA.ssub_sat(LHSB));
ResultElements.push_back(APValue(Res));
break;
}
}
}
for (unsigned I = 0; I != EltsPerLane; I += 2) {
APSInt RHSA = SourceRHS.getVectorElt(LaneStart + I).getInt();
APSInt RHSB = SourceRHS.getVectorElt(LaneStart + I + 1).getInt();
switch (E->getBuiltinCallee()) {
case clang::X86::BI__builtin_ia32_phaddw128:
case clang::X86::BI__builtin_ia32_phaddw256:
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256: {
APSInt Res(RHSA + RHSB, DestUnsigned);
ResultElements.push_back(APValue(Res));
break;
}
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256: {
APSInt Res(RHSA.sadd_sat(RHSB));
ResultElements.push_back(APValue(Res));
break;
}
case clang::X86::BI__builtin_ia32_phsubw128:
case clang::X86::BI__builtin_ia32_phsubw256:
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256: {
APSInt Res(RHSA - RHSB, DestUnsigned);
ResultElements.push_back(APValue(Res));
break;
}
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256: {
APSInt Res(RHSA.ssub_sat(RHSB));
ResultElements.push_back(APValue(Res));
break;
}
}
}
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case clang::X86::BI__builtin_ia32_haddpd:
case clang::X86::BI__builtin_ia32_haddps:
case clang::X86::BI__builtin_ia32_haddps256:
case clang::X86::BI__builtin_ia32_haddpd256:
case clang::X86::BI__builtin_ia32_hsubpd:
case clang::X86::BI__builtin_ia32_hsubps:
case clang::X86::BI__builtin_ia32_hsubps256:
case clang::X86::BI__builtin_ia32_hsubpd256: {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;
unsigned NumElts = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(NumElts);
llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
unsigned EltBits = Info.Ctx.getTypeSize(DestEltTy);
unsigned NumLanes = NumElts * EltBits / 128;
unsigned NumElemsPerLane = NumElts / NumLanes;
unsigned HalfElemsPerLane = NumElemsPerLane / 2;

for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
for (unsigned I = 0; I != HalfElemsPerLane; ++I) {
APFloat LHSA = SourceLHS.getVectorElt(L + (2 * I) + 0).getFloat();
APFloat LHSB = SourceLHS.getVectorElt(L + (2 * I) + 1).getFloat();
switch (E->getBuiltinCallee()) {
case clang::X86::BI__builtin_ia32_haddpd:
case clang::X86::BI__builtin_ia32_haddps:
case clang::X86::BI__builtin_ia32_haddps256:
case clang::X86::BI__builtin_ia32_haddpd256:
LHSA.add(LHSB, RM);
break;
case clang::X86::BI__builtin_ia32_hsubpd:
case clang::X86::BI__builtin_ia32_hsubps:
case clang::X86::BI__builtin_ia32_hsubps256:
case clang::X86::BI__builtin_ia32_hsubpd256:
LHSA.subtract(LHSB, RM);
break;
}
ResultElements.push_back(APValue(LHSA));
}
for (unsigned I = 0; I != HalfElemsPerLane; ++I) {
APFloat RHSA = SourceRHS.getVectorElt(L + (2 * I) + 0).getFloat();
APFloat RHSB = SourceRHS.getVectorElt(L + (2 * I) + 1).getFloat();
switch (E->getBuiltinCallee()) {
case clang::X86::BI__builtin_ia32_haddpd:
case clang::X86::BI__builtin_ia32_haddps:
case clang::X86::BI__builtin_ia32_haddps256:
case clang::X86::BI__builtin_ia32_haddpd256:
RHSA.add(RHSB, RM);
break;
case clang::X86::BI__builtin_ia32_hsubpd:
case clang::X86::BI__builtin_ia32_hsubps:
case clang::X86::BI__builtin_ia32_hsubps256:
case clang::X86::BI__builtin_ia32_hsubpd256:
RHSA.subtract(RHSB, RM);
break;
}
ResultElements.push_back(APValue(RHSA));
}
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case Builtin::BI__builtin_elementwise_fshl:
case Builtin::BI__builtin_elementwise_fshr: {
APValue SourceHi, SourceLo, SourceShift;
Expand Down
Loading
Loading