@@ -2583,10 +2583,35 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2583
2583
return true ;
2584
2584
}
2585
2585
2586
- static bool interp__builtin_ia32_pmadd (InterpState &S, CodePtr OpPC,
2587
- const CallExpr *Call,
2588
- unsigned BuiltinID) {
2589
- return true ; // TODO: Implement the builtin.
2586
+ static bool interp__builtin_ia32_pmadd (
2587
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
2588
+ llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &,
2589
+ const APSInt &)>
2590
+ Fn) {
2591
+ assert (Call->getArg (0 )->getType ()->isVectorType () &&
2592
+ Call->getArg (1 )->getType ()->isVectorType ());
2593
+ const Pointer &RHS = S.Stk .pop <Pointer>();
2594
+ const Pointer &LHS = S.Stk .pop <Pointer>();
2595
+ const Pointer &Dst = S.Stk .peek <Pointer>();
2596
+
2597
+ const auto *VT = Call->getArg (0 )->getType ()->castAs <VectorType>();
2598
+ PrimType ElemT = *S.getContext ().classify (VT->getElementType ());
2599
+ unsigned NumElems = VT->getNumElements ();
2600
+ bool DestUnsigned = Call->getType ()->isUnsignedIntegerOrEnumerationType ();
2601
+
2602
+ for (unsigned I = 0 ; I != NumElems; I += 2 ) {
2603
+ INT_TYPE_SWITCH_NO_BOOL (ElemT, {
2604
+ APSInt LoLHS = LHS.elem <T>(I).toAPSInt ();
2605
+ APSInt HiLHS = LHS.elem <T>(I + 1 ).toAPSInt ();
2606
+ APSInt LoRHS = RHS.elem <T>(I).toAPSInt ();
2607
+ APSInt HiRHS = RHS.elem <T>(I + 1 ).toAPSInt ();
2608
+ Dst.elem <T>(I) =
2609
+ static_cast <T>(APSInt (Fn (LoLHS, HiLHS, LoRHS, HiRHS), DestUnsigned));
2610
+ });
2611
+ }
2612
+
2613
+ Dst.initializeAllElements ();
2614
+ return true ;
2590
2615
}
2591
2616
2592
2617
static bool interp__builtin_ia32_pmul (InterpState &S, CodePtr OpPC,
@@ -3503,12 +3528,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
3503
3528
case clang::X86::BI__builtin_ia32_pmaddubsw128:
3504
3529
case clang::X86::BI__builtin_ia32_pmaddubsw256:
3505
3530
case clang::X86::BI__builtin_ia32_pmaddubsw512:
3506
- return true ; // TODO: Use interp__builtin_i32_pmadd.
3507
-
3531
+ return interp__builtin_ia32_pmadd (
3532
+ S, OpPC, Call,
3533
+ [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3534
+ const APSInt &HiRHS) {
3535
+ unsigned BitWidth = 2 * LoLHS.getBitWidth ();
3536
+ return (LoLHS.zext (BitWidth) * LoRHS.sext (BitWidth))
3537
+ .sadd_sat ((HiLHS.zext (BitWidth) * HiRHS.sext (BitWidth)));
3538
+ });
3539
+
3508
3540
case clang::X86::BI__builtin_ia32_pmaddwd128:
3509
3541
case clang::X86::BI__builtin_ia32_pmaddwd256:
3510
3542
case clang::X86::BI__builtin_ia32_pmaddwd512:
3511
- return true ; // TODO: Use interp__builtin_i32_pmadd.
3543
+ return interp__builtin_ia32_pmadd (
3544
+ S, OpPC, Call,
3545
+ [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS,
3546
+ const APSInt &HiRHS) {
3547
+ unsigned BitWidth = 2 * LoLHS.getBitWidth ();
3548
+ return (LoLHS.sext (BitWidth) * LoRHS.sext (BitWidth)) +
3549
+ (HiLHS.sext (BitWidth) * HiRHS.sext (BitWidth));
3550
+ });
3512
3551
3513
3552
case clang::X86::BI__builtin_ia32_pmulhuw128:
3514
3553
case clang::X86::BI__builtin_ia32_pmulhuw256:
0 commit comments