@@ -2601,7 +2601,62 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
26012601static bool interp__builtin_ia32_pmadd (InterpState &S, CodePtr OpPC,
26022602 const CallExpr *Call,
26032603 unsigned BuiltinID) {
2604- return true ; // TODO: Implement the builtin.
2604+ assert (Call->getArg (0 )->getType ()->isVectorType () &&
2605+ Call->getArg (1 )->getType ()->isVectorType ());
2606+ const Pointer &RHS = S.Stk .pop <Pointer>();
2607+ const Pointer &LHS = S.Stk .pop <Pointer>();
2608+ const Pointer &Dst = S.Stk .peek <Pointer>();
2609+
2610+ const auto *VT = Call->getArg (0 )->getType ()->castAs <VectorType>();
2611+ PrimType ElemT = *S.getContext ().classify (VT->getElementType ());
2612+ unsigned NumElems = VT->getNumElements ();
2613+
2614+ PrimType DstElemT = *S.getContext ().classify (
2615+ Call->getType ()->castAs <VectorType>()->getElementType ());
2616+ unsigned DstElem = 0 ;
2617+ for (unsigned I = 0 ; I < NumElems; I += 2 ) {
2618+ APInt U_LHS0;
2619+ APInt U_LHS1;
2620+ APSInt LHS0;
2621+ APSInt LHS1;
2622+ APSInt RHS0;
2623+ APSInt RHS1;
2624+ INT_TYPE_SWITCH_NO_BOOL (ElemT, {
2625+ U_LHS0 = LHS.elem <T>(I).toAPSInt ();
2626+ U_LHS1 = LHS.elem <T>(I+1 ).toAPSInt ();
2627+ LHS0 = LHS.elem <T>(I).toAPSInt ();
2628+ LHS1 = LHS.elem <T>(I+1 ).toAPSInt ();
2629+ RHS0 = RHS.elem <T>(I).toAPSInt ();
2630+ RHS1 = RHS.elem <T>(I+1 ).toAPSInt ();
2631+ });
2632+
2633+ APSInt Mul0;
2634+ APSInt Mul1;
2635+ APSInt Result;
2636+ unsigned BitWidth = LHS0.getBitWidth ();
2637+ switch (BuiltinID) {
2638+ case clang::X86::BI__builtin_ia32_pmaddubsw128:
2639+ case clang::X86::BI__builtin_ia32_pmaddubsw256:
2640+ case clang::X86::BI__builtin_ia32_pmaddubsw512:
2641+ Mul0 = APSInt (U_LHS0.zext (BitWidth) * RHS0.sext (BitWidth));
2642+ Mul1 = APSInt (U_LHS1.zext (BitWidth) * RHS1.sext (BitWidth));
2643+ Result = APSInt (Mul0.sadd_sat (Mul1));
2644+ break ;
2645+ case clang::X86::BI__builtin_ia32_pmaddwd128:
2646+ case clang::X86::BI__builtin_ia32_pmaddwd256:
2647+ case clang::X86::BI__builtin_ia32_pmaddwd512:
2648+ Mul0 = APSInt (LHS0.sext (BitWidth) * RHS0.sext (BitWidth));
2649+ Mul1 = APSInt (LHS1.sext (BitWidth) * RHS1.sext (BitWidth));
2650+ Result = APSInt (Mul0 + Mul1);
2651+ break ;
2652+ }
2653+ INT_TYPE_SWITCH_NO_BOOL (DstElemT,
2654+ { Dst.elem <T>(DstElem) = static_cast <T>(Result); });
2655+ ++DstElem;
2656+ }
2657+
2658+ Dst.initializeAllElements ();
2659+ return true ;
26052660}
26062661
26072662static bool interp__builtin_ia32_pmul (InterpState &S, CodePtr OpPC,
@@ -3373,12 +3428,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33733428 case clang::X86::BI__builtin_ia32_pmaddubsw128:
33743429 case clang::X86::BI__builtin_ia32_pmaddubsw256:
33753430 case clang::X86::BI__builtin_ia32_pmaddubsw512:
3376- return true ; // TODO: Use interp__builtin_i32_pmadd.
3431+ return interp__builtin_ia32_pmadd (S, OpPC, Call, BuiltinID);
33773432
33783433 case clang::X86::BI__builtin_ia32_pmaddwd128:
33793434 case clang::X86::BI__builtin_ia32_pmaddwd256:
33803435 case clang::X86::BI__builtin_ia32_pmaddwd512:
3381- return true ; // TODO: Use interp__builtin_i32_pmadd.
3436+ return interp__builtin_ia32_pmadd (S, OpPC, Call, BuiltinID);
33823437
33833438 case clang::X86::BI__builtin_ia32_pmulhuw128:
33843439 case clang::X86::BI__builtin_ia32_pmulhuw256:
0 commit comments