@@ -2579,10 +2579,30 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2579
2579
return true ;
2580
2580
}
2581
2581
2582
- static bool interp__builtin_ia32_pmadd (InterpState &S, CodePtr OpPC,
2583
- const CallExpr *Call,
2584
- unsigned BuiltinID) {
2585
- return true ; // TODO: Implement the builtin.
2582
+ static bool interp__builtin_ia32_pmadd (
2583
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
2584
+ llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
2585
+ assert (Call->getArg (0 )->getType ()->isVectorType () &&
2586
+ Call->getArg (1 )->getType ()->isVectorType ());
2587
+ const Pointer &RHS = S.Stk .pop <Pointer>();
2588
+ const Pointer &LHS = S.Stk .pop <Pointer>();
2589
+ const Pointer &Dst = S.Stk .peek <Pointer>();
2590
+
2591
+ const auto *VT = Call->getArg (0 )->getType ()->castAs <VectorType>();
2592
+ PrimType ElemT = *S.getContext ().classify (VT->getElementType ());
2593
+ unsigned NumElems = VT->getNumElements ();
2594
+ bool DestUnsigned = Call->getType ()->isUnsignedIntegerOrEnumerationType ();
2595
+
2596
+ for (unsigned I = 0 ; I != NumElems; ++I) {
2597
+ INT_TYPE_SWITCH_NO_BOOL (ElemT, {
2598
+ APSInt Elem1 = LHS.elem <T>(I).toAPSInt ();
2599
+ APSInt Elem2 = RHS.elem <T>(I).toAPSInt ();
2600
+ Dst.elem <T>(I) = static_cast <T>(APSInt (Fn (Elem1, Elem2), DestUnsigned));
2601
+ });
2602
+ }
2603
+
2604
+ Dst.initializeAllElements ();
2605
+ return true ;
2586
2606
}
2587
2607
2588
2608
static bool interp__builtin_ia32_pmul (InterpState &S, CodePtr OpPC,
@@ -3457,12 +3477,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
3457
3477
case clang::X86::BI__builtin_ia32_pmaddubsw128:
3458
3478
case clang::X86::BI__builtin_ia32_pmaddubsw256:
3459
3479
case clang::X86::BI__builtin_ia32_pmaddubsw512:
3460
- return true ; // TODO: Use interp__builtin_i32_pmadd.
3480
+ return interp__builtin_ia32_pmadd (S, OpPC, Call,
3481
+ [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS, const APSInt &HiRHS) {
3482
+ unsigned BitWidth = 2 * LHS.getBitWidth ();
3483
+ return (LoLHS.zext (BitWidth) * LoRHS.sext (BitWidth)).sadd_sat ((HiLHS.zext (BitWidth) * HiRHS.sext (BitWidth)));
3484
+ });
3461
3485
3462
3486
case clang::X86::BI__builtin_ia32_pmaddwd128:
3463
3487
case clang::X86::BI__builtin_ia32_pmaddwd256:
3464
3488
case clang::X86::BI__builtin_ia32_pmaddwd512:
3465
- return true ; // TODO: Use interp__builtin_i32_pmadd.
3489
+ return interp__builtin_ia32_pmadd (S, OpPC, Call,
3490
+ [](const APSInt &LoLHS, const APSInt &HiLHS, const APSInt &LoRHS, const APSInt &HiRHS) {
3491
+ unsigned BitWidth = 2 * LHS.getBitWidth ();
3492
+ return (LoLHS.sext (BitWidth) * LoRHS.sext (BitWidth)) + (HiLHS.sext (BitWidth) * HiRHS.sext (BitWidth));
3493
+ });
3466
3494
3467
3495
case clang::X86::BI__builtin_ia32_pmulhuw128:
3468
3496
case clang::X86::BI__builtin_ia32_pmulhuw256:
0 commit comments