@@ -12420,9 +12420,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1242012420 }
1242112421 case clang::X86::BI__builtin_ia32_phaddsw128:
1242212422 case clang::X86::BI__builtin_ia32_phaddsw256: {
12423- APSInt Res(LHSA.isSigned() ? LHSA.sadd_sat(LHSB)
12424- : LHSA.uadd_sat(LHSB),
12425- DestUnsigned);
12423+ APSInt Res(LHSA.sadd_sat(LHSB));
1242612424 ResultElements.push_back(APValue(Res));
1242712425 break;
1242812426 }
@@ -12436,9 +12434,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1243612434 }
1243712435 case clang::X86::BI__builtin_ia32_phsubsw128:
1243812436 case clang::X86::BI__builtin_ia32_phsubsw256: {
12439- APSInt Res(LHSA.isSigned() ? LHSA.ssub_sat(LHSB)
12440- : LHSA.usub_sat(LHSB),
12441- DestUnsigned);
12437+ APSInt Res(LHSA.ssub_sat(LHSB));
1244212438 ResultElements.push_back(APValue(Res));
1244312439 break;
1244412440 }
@@ -12458,9 +12454,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1245812454 }
1245912455 case clang::X86::BI__builtin_ia32_phaddsw128:
1246012456 case clang::X86::BI__builtin_ia32_phaddsw256: {
12461- APSInt Res(RHSA.isSigned() ? RHSA.sadd_sat(RHSB)
12462- : RHSA.uadd_sat(RHSB),
12463- DestUnsigned);
12457+ APSInt Res(RHSA.sadd_sat(RHSB));
1246412458 ResultElements.push_back(APValue(Res));
1246512459 break;
1246612460 }
@@ -12474,9 +12468,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1247412468 }
1247512469 case clang::X86::BI__builtin_ia32_phsubsw128:
1247612470 case clang::X86::BI__builtin_ia32_phsubsw256: {
12477- APSInt Res(RHSA.isSigned() ? RHSA.ssub_sat(RHSB)
12478- : RHSA.usub_sat(RHSB),
12479- DestUnsigned);
12471+ APSInt Res(RHSA.ssub_sat(RHSB));
1248012472 ResultElements.push_back(APValue(Res));
1248112473 break;
1248212474 }
@@ -12486,110 +12478,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1248612478 return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1248712479 }
1248812480 case clang::X86::BI__builtin_ia32_haddpd:
12489- case clang::X86::BI__builtin_ia32_haddps: {
12490- APValue SourceLHS, SourceRHS;
12491- if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
12492- !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
12493- return false;
12494- unsigned SourceLen = SourceLHS.getVectorLength();
12495- SmallVector<APValue, 4> ResultElements;
12496- ResultElements.reserve(SourceLen);
12497- llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
12498- for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
12499- APFloat LHSA = SourceLHS.getVectorElt(EltNum).getFloat();
12500- APFloat LHSB = SourceLHS.getVectorElt(EltNum + 1).getFloat();
12501- LHSA.add(LHSB, RM);
12502- ResultElements.push_back(APValue(LHSA));
12503- }
12504- for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
12505- APFloat RHSA = SourceRHS.getVectorElt(EltNum).getFloat();
12506- APFloat RHSB = SourceRHS.getVectorElt(EltNum + 1).getFloat();
12507- RHSA.add(RHSB, RM);
12508- ResultElements.push_back(APValue(RHSA));
12509- }
12510- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12511- }
12512- case clang::X86::BI__builtin_ia32_hsubpd:
12513- case clang::X86::BI__builtin_ia32_hsubps: {
12514- APValue SourceLHS, SourceRHS;
12515- if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
12516- !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
12517- return false;
12518- unsigned SourceLen = SourceLHS.getVectorLength();
12519- SmallVector<APValue, 4> ResultElements;
12520- ResultElements.reserve(SourceLen);
12521- llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
12522- for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
12523- APFloat LHSA = SourceLHS.getVectorElt(EltNum).getFloat();
12524- APFloat LHSB = SourceLHS.getVectorElt(EltNum + 1).getFloat();
12525- LHSA.subtract(LHSB, RM);
12526- ResultElements.push_back(APValue(LHSA));
12527- }
12528- for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
12529- APFloat RHSA = SourceRHS.getVectorElt(EltNum).getFloat();
12530- APFloat RHSB = SourceRHS.getVectorElt(EltNum + 1).getFloat();
12531- RHSA.subtract(RHSB, RM);
12532- ResultElements.push_back(APValue(RHSA));
12533- }
12534- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12535- }
12481+ case clang::X86::BI__builtin_ia32_haddps:
12482+ case clang::X86::BI__builtin_ia32_haddps256:
1253612483 case clang::X86::BI__builtin_ia32_haddpd256:
12484+ case clang::X86::BI__builtin_ia32_hsubpd:
12485+ case clang::X86::BI__builtin_ia32_hsubps:
12486+ case clang::X86::BI__builtin_ia32_hsubps256:
1253712487 case clang::X86::BI__builtin_ia32_hsubpd256: {
1253812488 APValue SourceLHS, SourceRHS;
1253912489 if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
1254012490 !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
1254112491 return false;
12542- SmallVector<APValue, 4> ResultElements(4);
12543- llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
12544- for (unsigned i = 0; i < 2; ++i) {
12545- APFloat A = SourceLHS.getVectorElt(2 * i).getFloat();
12546- APFloat B = SourceLHS.getVectorElt(2 * i + 1).getFloat();
12547- if (E->getBuiltinCallee() == clang::X86::BI__builtin_ia32_haddpd256)
12548- A.add(B, RM);
12549- else
12550- A.subtract(B, RM);
12551- ResultElements[2 * i] = APValue(A);
12552- }
12553- for (unsigned i = 0; i < 2; ++i) {
12554- APFloat A = SourceRHS.getVectorElt(2 * i).getFloat();
12555- APFloat B = SourceRHS.getVectorElt(2 * i + 1).getFloat();
12556- if (E->getBuiltinCallee() == clang::X86::BI__builtin_ia32_haddpd256)
12557- A.add(B, RM);
12558- else
12559- A.subtract(B, RM);
12560- ResultElements[2 * i + 1] = APValue(A);
12561- }
12562- return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12563- }
12564- case clang::X86::BI__builtin_ia32_haddps256:
12565- case clang::X86::BI__builtin_ia32_hsubps256: {
12566- APValue SourceLHS, SourceRHS;
12567- if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
12568- !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
12569- return false;
12570- SmallVector<APValue, 4> ResultElements(8);
12492+ unsigned NumElts = SourceLHS.getVectorLength();
12493+ SmallVector<APValue, 4> ResultElements;
12494+ ResultElements.reserve(NumElts);
1257112495 llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);
12572- for (unsigned i = 0; i < 4; ++i) {
12573- unsigned SrcIdx = 2 * i;
12574- unsigned DestIdx = (i < 2) ? i : (i + 2);
12575- APFloat A = SourceLHS.getVectorElt(SrcIdx).getFloat();
12576- APFloat B = SourceLHS.getVectorElt(SrcIdx + 1).getFloat();
12577- if (E->getBuiltinCallee() == clang::X86::BI__builtin_ia32_haddps256)
12578- A.add(B, RM);
12579- else
12580- A.subtract(B, RM);
12581- ResultElements[DestIdx] = APValue(A);
12582- }
12583- for (unsigned i = 0; i < 4; ++i) {
12584- unsigned SrcIdx = 2 * i;
12585- unsigned DestIdx = (i < 2) ? (i + 2) : (i + 4);
12586- APFloat A = SourceRHS.getVectorElt(SrcIdx).getFloat();
12587- APFloat B = SourceRHS.getVectorElt(SrcIdx + 1).getFloat();
12588- if (E->getBuiltinCallee() == clang::X86::BI__builtin_ia32_haddps256)
12589- A.add(B, RM);
12590- else
12591- A.subtract(B, RM);
12592- ResultElements[DestIdx] = APValue(A);
12496+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
12497+ unsigned EltBits = Info.Ctx.getTypeSize(DestEltTy);
12498+ unsigned NumLanes = NumElts * EltBits / 128;
12499+ unsigned NumElemsPerLane = NumElts / NumLanes;
12500+ unsigned HalfElemsPerLane = NumElemsPerLane / 2;
12501+
12502+ for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
12503+ for (unsigned I = 0; I != HalfElemsPerLane; ++I) {
12504+ APFloat LHSA = SourceLHS.getVectorElt(L + (2 * I) + 0).getFloat();
12505+ APFloat LHSB = SourceLHS.getVectorElt(L + (2 * I) + 1).getFloat();
12506+ switch (E->getBuiltinCallee()) {
12507+ case clang::X86::BI__builtin_ia32_haddpd:
12508+ case clang::X86::BI__builtin_ia32_haddps:
12509+ case clang::X86::BI__builtin_ia32_haddps256:
12510+ case clang::X86::BI__builtin_ia32_haddpd256:
12511+ LHSA.add(LHSB, RM);
12512+ break;
12513+ case clang::X86::BI__builtin_ia32_hsubpd:
12514+ case clang::X86::BI__builtin_ia32_hsubps:
12515+ case clang::X86::BI__builtin_ia32_hsubps256:
12516+ case clang::X86::BI__builtin_ia32_hsubpd256:
12517+ LHSA.subtract(LHSB, RM);
12518+ break;
12519+ }
12520+ ResultElements.push_back(APValue(LHSA));
12521+ }
12522+ for (unsigned I = 0; I != HalfElemsPerLane; ++I) {
12523+ APFloat RHSA = SourceRHS.getVectorElt(L + (2 * I) + 0).getFloat();
12524+ APFloat RHSB = SourceRHS.getVectorElt(L + (2 * I) + 1).getFloat();
12525+ switch (E->getBuiltinCallee()) {
12526+ case clang::X86::BI__builtin_ia32_haddpd:
12527+ case clang::X86::BI__builtin_ia32_haddps:
12528+ case clang::X86::BI__builtin_ia32_haddps256:
12529+ case clang::X86::BI__builtin_ia32_haddpd256:
12530+ RHSA.add(RHSB, RM);
12531+ break;
12532+ case clang::X86::BI__builtin_ia32_hsubpd:
12533+ case clang::X86::BI__builtin_ia32_hsubps:
12534+ case clang::X86::BI__builtin_ia32_hsubps256:
12535+ case clang::X86::BI__builtin_ia32_hsubpd256:
12536+ RHSA.subtract(RHSB, RM);
12537+ break;
12538+ }
12539+ ResultElements.push_back(APValue(RHSA));
12540+ }
1259312541 }
1259412542 return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1259512543 }
0 commit comments