Skip to content

Commit 19fad2f

Browse files
committed
Modified InterpBuiltin.cpp and ExprConstant.cpp
1 parent 9ada542 commit 19fad2f

File tree

2 files changed

+101
-11
lines changed

2 files changed

+101
-11
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2601,7 +2601,62 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
26012601
static bool interp__builtin_ia32_pmadd(InterpState &S, CodePtr OpPC,
26022602
const CallExpr *Call,
26032603
unsigned BuiltinID) {
2604-
return true; // TODO: Implement the builtin.
2604+
assert(Call->getArg(0)->getType()->isVectorType() &&
2605+
Call->getArg(1)->getType()->isVectorType());
2606+
const Pointer &RHS = S.Stk.pop<Pointer>();
2607+
const Pointer &LHS = S.Stk.pop<Pointer>();
2608+
const Pointer &Dst = S.Stk.peek<Pointer>();
2609+
2610+
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
2611+
PrimType ElemT = *S.getContext().classify(VT->getElementType());
2612+
unsigned NumElems = VT->getNumElements();
2613+
2614+
PrimType DstElemT = *S.getContext().classify(
2615+
Call->getType()->castAs<VectorType>()->getElementType());
2616+
unsigned DstElem = 0;
2617+
for (unsigned I = 0; I < NumElems; I += 2) {
2618+
APInt U_LHS0;
2619+
APInt U_LHS1;
2620+
APSInt LHS0;
2621+
APSInt LHS1;
2622+
APSInt RHS0;
2623+
APSInt RHS1;
2624+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2625+
U_LHS0 = LHS.elem<T>(I).toAPSInt();
2626+
U_LHS1 = LHS.elem<T>(I+1).toAPSInt();
2627+
LHS0 = LHS.elem<T>(I).toAPSInt();
2628+
LHS1 = LHS.elem<T>(I+1).toAPSInt();
2629+
RHS0 = RHS.elem<T>(I).toAPSInt();
2630+
RHS1 = RHS.elem<T>(I+1).toAPSInt();
2631+
});
2632+
2633+
APSInt Mul0;
2634+
APSInt Mul1;
2635+
APSInt Result;
2636+
unsigned BitWidth = LHS0.getBitWidth();
2637+
switch (BuiltinID) {
2638+
case clang::X86::BI__builtin_ia32_pmaddubsw128:
2639+
case clang::X86::BI__builtin_ia32_pmaddubsw256:
2640+
case clang::X86::BI__builtin_ia32_pmaddubsw512:
2641+
Mul0 = APSInt(U_LHS0.zext(BitWidth) * RHS0.sext(BitWidth));
2642+
Mul1 = APSInt(U_LHS1.zext(BitWidth) * RHS1.sext(BitWidth));
2643+
Result = APSInt(Mul0.sadd_sat(Mul1));
2644+
break;
2645+
case clang::X86::BI__builtin_ia32_pmaddwd128:
2646+
case clang::X86::BI__builtin_ia32_pmaddwd256:
2647+
case clang::X86::BI__builtin_ia32_pmaddwd512:
2648+
Mul0 = APSInt(LHS0.sext(BitWidth) * RHS0.sext(BitWidth));
2649+
Mul1 = APSInt(LHS1.sext(BitWidth) * RHS1.sext(BitWidth));
2650+
Result = APSInt(Mul0 + Mul1);
2651+
break;
2652+
}
2653+
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
2654+
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2655+
++DstElem;
2656+
}
2657+
2658+
Dst.initializeAllElements();
2659+
return true;
26052660
}
26062661

26072662
static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
@@ -3373,12 +3428,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33733428
case clang::X86::BI__builtin_ia32_pmaddubsw128:
33743429
case clang::X86::BI__builtin_ia32_pmaddubsw256:
33753430
case clang::X86::BI__builtin_ia32_pmaddubsw512:
3376-
return true; // TODO: Use interp__builtin_i32_pmadd.
3431+
return interp__builtin_ia32_pmadd(S, OpPC, Call, BuiltinID);
33773432

33783433
case clang::X86::BI__builtin_ia32_pmaddwd128:
33793434
case clang::X86::BI__builtin_ia32_pmaddwd256:
33803435
case clang::X86::BI__builtin_ia32_pmaddwd512:
3381-
return true; // TODO: Use interp__builtin_i32_pmadd.
3436+
return interp__builtin_ia32_pmadd(S, OpPC, Call, BuiltinID);
33823437

33833438
case clang::X86::BI__builtin_ia32_pmulhuw128:
33843439
case clang::X86::BI__builtin_ia32_pmulhuw256:

clang/lib/AST/ExprConstant.cpp

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11725,18 +11725,53 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1172511725
return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
1172611726

1172711727
case clang::X86::BI__builtin_ia32_pmaddubsw128:
11728-
case clang::X86::BI__builtin_ia32_pmaddwd128:
1172911728
case clang::X86::BI__builtin_ia32_pmaddubsw256:
11730-
case clang::X86::BI__builtin_ia32_pmaddwd256:
1173111729
case clang::X86::BI__builtin_ia32_pmaddubsw512:
11732-
case clang::X86::BI__builtin_ia32_pmaddwd512:
11733-
return true; // TODO: Handle __builtin_ia32_pmaddub
11734-
1173511730
case clang::X86::BI__builtin_ia32_pmaddwd128:
1173611731
case clang::X86::BI__builtin_ia32_pmaddwd256:
11737-
case clang::X86::BI__builtin_ia32_pmaddwd512:
11738-
return true; // TODO: Handle __builtin_ia32_pmadd
11739-
});
11732+
case clang::X86::BI__builtin_ia32_pmaddwd512: {
11733+
APValue SourceLHS, SourceRHS;
11734+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
11735+
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
11736+
return false;
11737+
11738+
unsigned SourceLen = SourceLHS.getVectorLength();
11739+
SmallVector<APValue, 4> ResultElements;
11740+
ResultElements.reserve(SourceLen / 2);
11741+
11742+
for (unsigned EltNum = 0; EltNum < SourceLen; EltNum += 2) {
11743+
APInt U_LHS0 = SourceLHS.getVectorElt(EltNum).getInt();
11744+
APInt U_LHS1 = SourceLHS.getVectorElt(EltNum + 1).getInt();
11745+
APSInt LHS0 = SourceLHS.getVectorElt(EltNum).getInt();
11746+
APSInt LHS1 = SourceLHS.getVectorElt(EltNum + 1).getInt();
11747+
APSInt RHS0 = SourceRHS.getVectorElt(EltNum).getInt();
11748+
APSInt RHS1 = SourceRHS.getVectorElt(EltNum + 1).getInt();
11749+
unsigned BitWidth = LHS0.getBitWidth();
11750+
11751+
switch (E->getBuiltinCallee()) {
11752+
case clang::X86::BI__builtin_ia32_pmaddubsw128:
11753+
case clang::X86::BI__builtin_ia32_pmaddubsw256:
11754+
case clang::X86::BI__builtin_ia32_pmaddubsw512:
11755+
ResultElements.push_back(
11756+
APValue(APSInt(APInt(
11757+
U_LHS0.zext(BitWidth)) * RHS0.sext(BitWidth)
11758+
.sadd_sat(APInt(U_LHS1.zext(BitWidth)) * RHS1.sext(BitWidth)
11759+
))));
11760+
break;
11761+
case clang::X86::BI__builtin_ia32_pmaddwd128:
11762+
case clang::X86::BI__builtin_ia32_pmaddwd256:
11763+
case clang::X86::BI__builtin_ia32_pmaddwd512:
11764+
ResultElements.push_back(
11765+
APValue(APSInt(
11766+
LHS0.sext(BitWidth) * RHS0.sext(BitWidth) +
11767+
LHS1.sext(BitWidth) * RHS1.sext(BitWidth)
11768+
)));
11769+
break;
11770+
}
11771+
}
11772+
11773+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11774+
}
1174011775

1174111776
case clang::X86::BI__builtin_ia32_pmulhuw128:
1174211777
case clang::X86::BI__builtin_ia32_pmulhuw256:

0 commit comments

Comments
 (0)