@@ -2601,7 +2601,62 @@ static bool interp__builtin_elementwise_maxmin(InterpState &S, CodePtr OpPC,
2601
2601
static bool interp__builtin_ia32_pmadd (InterpState &S, CodePtr OpPC,
2602
2602
const CallExpr *Call,
2603
2603
unsigned BuiltinID) {
2604
- return true ; // TODO: Implement the builtin.
2604
+ assert (Call->getArg (0 )->getType ()->isVectorType () &&
2605
+ Call->getArg (1 )->getType ()->isVectorType ());
2606
+ const Pointer &RHS = S.Stk .pop <Pointer>();
2607
+ const Pointer &LHS = S.Stk .pop <Pointer>();
2608
+ const Pointer &Dst = S.Stk .peek <Pointer>();
2609
+
2610
+ const auto *VT = Call->getArg (0 )->getType ()->castAs <VectorType>();
2611
+ PrimType ElemT = *S.getContext ().classify (VT->getElementType ());
2612
+ unsigned NumElems = VT->getNumElements ();
2613
+
2614
+ PrimType DstElemT = *S.getContext ().classify (
2615
+ Call->getType ()->castAs <VectorType>()->getElementType ());
2616
+ unsigned DstElem = 0 ;
2617
+ for (unsigned I = 0 ; I < NumElems; I += 2 ) {
2618
+ APInt U_LHS0;
2619
+ APInt U_LHS1;
2620
+ APSInt LHS0;
2621
+ APSInt LHS1;
2622
+ APSInt RHS0;
2623
+ APSInt RHS1;
2624
+ INT_TYPE_SWITCH_NO_BOOL (ElemT, {
2625
+ U_LHS0 = LHS.elem <T>(I).toAPSInt ();
2626
+ U_LHS1 = LHS.elem <T>(I+1 ).toAPSInt ();
2627
+ LHS0 = LHS.elem <T>(I).toAPSInt ();
2628
+ LHS1 = LHS.elem <T>(I+1 ).toAPSInt ();
2629
+ RHS0 = RHS.elem <T>(I).toAPSInt ();
2630
+ RHS1 = RHS.elem <T>(I+1 ).toAPSInt ();
2631
+ });
2632
+
2633
+ APSInt Mul0;
2634
+ APSInt Mul1;
2635
+ APSInt Result;
2636
+ unsigned BitWidth = LHS0.getBitWidth ();
2637
+ switch (BuiltinID) {
2638
+ case clang::X86::BI__builtin_ia32_pmaddubsw128:
2639
+ case clang::X86::BI__builtin_ia32_pmaddubsw256:
2640
+ case clang::X86::BI__builtin_ia32_pmaddubsw512:
2641
+ Mul0 = APSInt (U_LHS0.zext (BitWidth) * RHS0.sext (BitWidth));
2642
+ Mul1 = APSInt (U_LHS1.zext (BitWidth) * RHS1.sext (BitWidth));
2643
+ Result = APSInt (Mul0.sadd_sat (Mul1));
2644
+ break ;
2645
+ case clang::X86::BI__builtin_ia32_pmaddwd128:
2646
+ case clang::X86::BI__builtin_ia32_pmaddwd256:
2647
+ case clang::X86::BI__builtin_ia32_pmaddwd512:
2648
+ Mul0 = APSInt (LHS0.sext (BitWidth) * RHS0.sext (BitWidth));
2649
+ Mul1 = APSInt (LHS1.sext (BitWidth) * RHS1.sext (BitWidth));
2650
+ Result = APSInt (Mul0 + Mul1);
2651
+ break ;
2652
+ }
2653
+ INT_TYPE_SWITCH_NO_BOOL (DstElemT,
2654
+ { Dst.elem <T>(DstElem) = static_cast <T>(Result); });
2655
+ ++DstElem;
2656
+ }
2657
+
2658
+ Dst.initializeAllElements ();
2659
+ return true ;
2605
2660
}
2606
2661
2607
2662
static bool interp__builtin_ia32_pmul (InterpState &S, CodePtr OpPC,
@@ -3373,12 +3428,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
3373
3428
case clang::X86::BI__builtin_ia32_pmaddubsw128:
3374
3429
case clang::X86::BI__builtin_ia32_pmaddubsw256:
3375
3430
case clang::X86::BI__builtin_ia32_pmaddubsw512:
3376
- return true ; // TODO: Use interp__builtin_i32_pmadd.
3431
+ return interp__builtin_ia32_pmadd (S, OpPC, Call, BuiltinID);
3377
3432
3378
3433
case clang::X86::BI__builtin_ia32_pmaddwd128:
3379
3434
case clang::X86::BI__builtin_ia32_pmaddwd256:
3380
3435
case clang::X86::BI__builtin_ia32_pmaddwd512:
3381
- return true ; // TODO: Use interp__builtin_i32_pmadd.
3436
+ return interp__builtin_ia32_pmadd (S, OpPC, Call, BuiltinID);
3382
3437
3383
3438
case clang::X86::BI__builtin_ia32_pmulhuw128:
3384
3439
case clang::X86::BI__builtin_ia32_pmulhuw256:
0 commit comments