Skip to content

Commit 9ebf1af

Browse files
committed
Modified InterpBuiltin.cpp and ExprConstant.cpp
1 parent 6095d8f commit 9ebf1af

File tree

2 files changed

+88
-15
lines changed

2 files changed

+88
-15
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2583,10 +2583,35 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
25832583
return true;
25842584
}
25852585

2586-
static bool interp__builtin_ia32_pmadd(InterpState &S, CodePtr OpPC,
2587-
const CallExpr *Call,
2588-
unsigned BuiltinID) {
2589-
return true; // TODO: Implement the builtin.
2586+
static bool interp__builtin_ia32_pmadd(
2587+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
2588+
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2589+
const APSInt &)>
2590+
Fn) {
2591+
assert(Call->getArg(0)->getType()->isVectorType() &&
2592+
Call->getArg(1)->getType()->isVectorType());
2593+
const Pointer &RHS = S.Stk.pop<Pointer>();
2594+
const Pointer &LHS = S.Stk.pop<Pointer>();
2595+
const Pointer &Dst = S.Stk.peek<Pointer>();
2596+
2597+
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
2598+
PrimType ElemT = *S.getContext().classify(VT->getElementType());
2599+
unsigned NumElems = VT->getNumElements();
2600+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2601+
2602+
for (unsigned I = 0; I != NumElems; I += 2) {
2603+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2604+
APSInt LoLHS = LHS.elem<T>(I).toAPSInt();
2605+
APSInt HiLHS = LHS.elem<T>(I + 1).toAPSInt();
2606+
APSInt LoRHS = RHS.elem<T>(I).toAPSInt();
2607+
APSInt HiRHS = RHS.elem<T>(I + 1).toAPSInt();
2608+
Dst.elem<T>(I) =
2609+
static_cast<T>(APSInt(Fn(LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned));
2610+
});
2611+
}
2612+
2613+
Dst.initializeAllElements();
2614+
return true;
25902615
}
25912616

25922617
static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
@@ -3503,12 +3528,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
35033528
case clang::X86::BI__builtin_ia32_pmaddubsw128:
35043529
case clang::X86::BI__builtin_ia32_pmaddubsw256:
35053530
case clang::X86::BI__builtin_ia32_pmaddubsw512:
3506-
return true; // TODO: Use interp__builtin_i32_pmadd.
3507-
3531+
return interp__builtin_ia32_pmadd(
3532+
S, OpPC, Call,
3533+
[](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3534+
const APSInt &HiRHS) {
3535+
unsigned BitWidth = 2 * LoLHS.getBitWidth();
3536+
return (LoLHS.zext(BitWidth) * LoRHS.sext(BitWidth))
3537+
.sadd_sat((HiLHS.zext(BitWidth) * HiRHS.sext(BitWidth)));
3538+
});
3539+
35083540
case clang::X86::BI__builtin_ia32_pmaddwd128:
35093541
case clang::X86::BI__builtin_ia32_pmaddwd256:
35103542
case clang::X86::BI__builtin_ia32_pmaddwd512:
3511-
return true; // TODO: Use interp__builtin_i32_pmadd.
3543+
return interp__builtin_ia32_pmadd(
3544+
S, OpPC, Call,
3545+
[](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3546+
const APSInt &HiRHS) {
3547+
unsigned BitWidth = 2 * LoLHS.getBitWidth();
3548+
return (LoLHS.sext(BitWidth) * LoRHS.sext(BitWidth)) +
3549+
(HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth));
3550+
});
35123551

35133552
case clang::X86::BI__builtin_ia32_pmulhuw128:
35143553
case clang::X86::BI__builtin_ia32_pmulhuw256:

clang/lib/AST/ExprConstant.cpp

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11779,18 +11779,52 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1177911779
return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
1178011780

1178111781
case clang::X86::BI__builtin_ia32_pmaddubsw128:
11782-
case clang::X86::BI__builtin_ia32_pmaddwd128:
1178311782
case clang::X86::BI__builtin_ia32_pmaddubsw256:
11784-
case clang::X86::BI__builtin_ia32_pmaddwd256:
1178511783
case clang::X86::BI__builtin_ia32_pmaddubsw512:
11786-
case clang::X86::BI__builtin_ia32_pmaddwd512:
11787-
return true; // TODO: Handle __builtin_ia32_pmaddub
11788-
1178911784
case clang::X86::BI__builtin_ia32_pmaddwd128:
1179011785
case clang::X86::BI__builtin_ia32_pmaddwd256:
11791-
case clang::X86::BI__builtin_ia32_pmaddwd512:
11792-
return true; // TODO: Handle __builtin_ia32_pmadd
11793-
});
11786+
case clang::X86::BI__builtin_ia32_pmaddwd512: {
11787+
APValue SourceLHS, SourceRHS;
11788+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
11789+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
11790+
return false;
11791+
11792+
auto *DestTy = E->getType()->castAs<VectorType>();
11793+
QualType DestEltTy = DestTy->getElementType();
11794+
unsigned SourceLen = SourceLHS.getVectorLength();
11795+
bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
11796+
SmallVector<APValue, 4> ResultElements;
11797+
ResultElements.reserve(SourceLen / 2);
11798+
11799+
for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
11800+
const APSInt &LoLHS = SourceLHS.getVectorElt(EltNum).getInt();
11801+
const APSInt &HiLHS = SourceLHS.getVectorElt(EltNum + 1).getInt();
11802+
const APSInt &LoRHS = SourceRHS.getVectorElt(EltNum).getInt();
11803+
const APSInt &HiRHS = SourceRHS.getVectorElt(EltNum + 1).getInt();
11804+
unsigned BitWidth = 2 * LoLHS.getBitWidth();
11805+
11806+
switch (E->getBuiltinCallee()) {
11807+
case clang::X86::BI__builtin_ia32_pmaddubsw128:
11808+
case clang::X86::BI__builtin_ia32_pmaddubsw256:
11809+
case clang::X86::BI__builtin_ia32_pmaddubsw512:
11810+
ResultElements.push_back(APValue(
11811+
APSInt((LoLHS.zext(BitWidth) * LoRHS.sext(BitWidth))
11812+
.sadd_sat((HiLHS.zext(BitWidth) * HiRHS.sext(BitWidth))),
11813+
DestUnsigned)));
11814+
break;
11815+
case clang::X86::BI__builtin_ia32_pmaddwd128:
11816+
case clang::X86::BI__builtin_ia32_pmaddwd256:
11817+
case clang::X86::BI__builtin_ia32_pmaddwd512:
11818+
ResultElements.push_back(
11819+
APValue(APSInt((LoLHS.sext(BitWidth) * LoRHS.sext(BitWidth)) +
11820+
(HiLHS.sext(BitWidth) * HiRHS.sext(BitWidth)),
11821+
DestUnsigned)));
11822+
break;
11823+
}
11824+
}
11825+
11826+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11827+
}
1179411828

1179511829
case clang::X86::BI__builtin_ia32_pmulhuw128:
1179611830
case clang::X86::BI__builtin_ia32_pmulhuw256:

0 commit comments

Comments
 (0)