@@ -2714,6 +2714,62 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
27142714 return true ;
27152715}
27162716
2717+ static bool interp__builtin_elementwise_fma (InterpState &S, CodePtr OpPC,
2718+ const CallExpr *Call) {
2719+ assert (Call->getNumArgs () == 3 );
2720+
2721+ FPOptions FPO = Call->getFPFeaturesInEffect (S.Ctx .getLangOpts ());
2722+ llvm::RoundingMode RM = getRoundingMode (FPO);
2723+ const QualType Arg1Type = Call->getArg (0 )->getType ();
2724+ const QualType Arg2Type = Call->getArg (1 )->getType ();
2725+ const QualType Arg3Type = Call->getArg (2 )->getType ();
2726+
2727+ // Non-vector floating point types.
2728+ if (!Arg1Type->isVectorType ()) {
2729+ assert (!Arg2Type->isVectorType ());
2730+ assert (!Arg3Type->isVectorType ());
2731+
2732+ const Floating &Z = S.Stk .pop <Floating>();
2733+ const Floating &Y = S.Stk .pop <Floating>();
2734+ const Floating &X = S.Stk .pop <Floating>();
2735+ APFloat F = X.getAPFloat ();
2736+ F.fusedMultiplyAdd (Y.getAPFloat (), Z.getAPFloat (), RM);
2737+ Floating Result = S.allocFloat (X.getSemantics ());
2738+ Result.copy (F);
2739+ S.Stk .push <Floating>(Result);
2740+ return true ;
2741+ }
2742+
2743+ // Vector type.
2744+ assert (Arg1Type->isVectorType () && Arg2Type->isVectorType () &&
2745+ Arg3Type->isVectorType ());
2746+
2747+ const VectorType *VecT = Arg1Type->castAs <VectorType>();
2748+ const QualType ElemT = VecT->getElementType ();
2749+ unsigned NumElems = VecT->getNumElements ();
2750+
2751+ assert (ElemT == Arg2Type->castAs <VectorType>()->getElementType () &&
2752+ ElemT == Arg3Type->castAs <VectorType>()->getElementType ());
2753+ assert (NumElems == Arg2Type->castAs <VectorType>()->getNumElements () &&
2754+ NumElems == Arg3Type->castAs <VectorType>()->getNumElements ());
2755+ assert (ElemT->isRealFloatingType ());
2756+
2757+ const Pointer &VZ = S.Stk .pop <Pointer>();
2758+ const Pointer &VY = S.Stk .pop <Pointer>();
2759+ const Pointer &VX = S.Stk .pop <Pointer>();
2760+ const Pointer &Dst = S.Stk .peek <Pointer>();
2761+ for (unsigned I = 0 ; I != NumElems; ++I) {
2762+ using T = PrimConv<PT_Float>::T;
2763+ APFloat X = VX.elem <T>(I).getAPFloat ();
2764+ APFloat Y = VY.elem <T>(I).getAPFloat ();
2765+ APFloat Z = VZ.elem <T>(I).getAPFloat ();
2766+ (void )X.fusedMultiplyAdd (Y, Z, RM);
2767+ Dst.elem <Floating>(I) = Floating (X);
2768+ }
2769+ Dst.initializeAllElements ();
2770+ return true ;
2771+ }
2772+
27172773bool InterpretBuiltin (InterpState &S, CodePtr OpPC, const CallExpr *Call,
27182774 uint32_t BuiltinID) {
27192775 if (!S.getASTContext ().BuiltinInfo .isConstantEvaluated (BuiltinID))
@@ -3145,6 +3201,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
31453201 case clang::X86::BI__builtin_ia32_pmuludq128:
31463202 case clang::X86::BI__builtin_ia32_pmuludq256:
31473203 return interp__builtin_ia32_pmul (S, OpPC, Call, BuiltinID);
3204+ case Builtin::BI__builtin_elementwise_fma:
3205+ return interp__builtin_elementwise_fma (S, OpPC, Call);
31483206
31493207 default :
31503208 S.FFDiag (S.Current ->getLocation (OpPC),
0 commit comments