@@ -2683,9 +2683,10 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
26832683 const auto *VT = Call->getArg (0 )->getType ()->castAs <VectorType>();
26842684 PrimType ElemT = *S.getContext ().classify (VT->getElementType ());
26852685 unsigned SourceLen = VT->getNumElements ();
2686- SmallVector<APValue, 4 > ResultElements;
2687- ResultElements.reserve (SourceLen / 2 );
26882686
2687+ PrimType DstElemT = *S.getContext ().classify (
2688+ Call->getType ()->castAs <VectorType>()->getElementType ());
2689+ unsigned DstElem = 0 ;
26892690 for (unsigned I = 0 ; I != SourceLen; I += 2 ) {
26902691 APSInt Elem1;
26912692 APSInt Elem2;
@@ -2699,16 +2700,19 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
26992700 case clang::X86::BI__builtin_ia32_pmuludq128:
27002701 case clang::X86::BI__builtin_ia32_pmuludq256:
27012702 case clang::X86::BI__builtin_ia32_pmuludq512:
2702- Result = APSInt (llvm::APIntOps::muluExtended (Elem1, Elem2), true );
2703+ Result = APSInt (llvm::APIntOps::muluExtended (Elem1, Elem2),
2704+ /* IsUnsigned=*/ true );
27032705 break ;
27042706 case clang::X86::BI__builtin_ia32_pmuldq128:
27052707 case clang::X86::BI__builtin_ia32_pmuldq256:
27062708 case clang::X86::BI__builtin_ia32_pmuldq512:
2707- Result = APSInt (llvm::APIntOps::mulsExtended (Elem1, Elem2), false );
2709+ Result = APSInt (llvm::APIntOps::mulsExtended (Elem1, Elem2),
2710+ /* IsUnsigned=*/ false );
27082711 break ;
27092712 }
2710- INT_TYPE_SWITCH_NO_BOOL (ElemT,
2711- { Dst.elem <T>(I) = static_cast <T>(Result); });
2713+ INT_TYPE_SWITCH_NO_BOOL (DstElemT,
2714+ { Dst.elem <T>(DstElem) = static_cast <T>(Result); });
2715+ ++DstElem;
27122716 }
27132717
27142718 Dst.initializeAllElements ();
@@ -2774,6 +2778,40 @@ static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
27742778 return true ;
27752779}
27762780
2781+ // / AVX512 predicated move: "Result = Mask[] ? LHS[] : RHS[]".
2782+ static bool interp__builtin_select (InterpState &S, CodePtr OpPC,
2783+ const CallExpr *Call) {
2784+ const Pointer &RHS = S.Stk .pop <Pointer>();
2785+ const Pointer &LHS = S.Stk .pop <Pointer>();
2786+ PrimType MaskT = *S.getContext ().classify (Call->getArg (0 ));
2787+ APSInt Mask = popToAPSInt (S.Stk , MaskT);
2788+ const Pointer &Dst = S.Stk .peek <Pointer>();
2789+
2790+ assert (LHS.getNumElems () == RHS.getNumElems ());
2791+ assert (LHS.getNumElems () == Dst.getNumElems ());
2792+ unsigned NumElems = LHS.getNumElems ();
2793+ PrimType ElemT = LHS.getFieldDesc ()->getPrimType ();
2794+ PrimType DstElemT = Dst.getFieldDesc ()->getPrimType ();
2795+
2796+ for (unsigned I = 0 ; I != NumElems; ++I) {
2797+ if (ElemT == PT_Float) {
2798+ assert (DstElemT == PT_Float);
2799+ Dst.elem <Floating>(I) =
2800+ Mask[I] ? LHS.elem <Floating>(I) : RHS.elem <Floating>(I);
2801+ } else {
2802+ APSInt Elem;
2803+ INT_TYPE_SWITCH (ElemT, {
2804+ Elem = Mask[I] ? LHS.elem <T>(I).toAPSInt () : RHS.elem <T>(I).toAPSInt ();
2805+ });
2806+ INT_TYPE_SWITCH_NO_BOOL (DstElemT,
2807+ { Dst.elem <T>(I) = static_cast <T>(Elem); });
2808+ }
2809+ }
2810+ Dst.initializeAllElements ();
2811+
2812+ return true ;
2813+ }
2814+
27772815bool InterpretBuiltin (InterpState &S, CodePtr OpPC, const CallExpr *Call,
27782816 uint32_t BuiltinID) {
27792817 if (!S.getASTContext ().BuiltinInfo .isConstantEvaluated (BuiltinID))
@@ -3204,10 +3242,38 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
32043242 case clang::X86::BI__builtin_ia32_pmuldq512:
32053243 case clang::X86::BI__builtin_ia32_pmuludq128:
32063244 case clang::X86::BI__builtin_ia32_pmuludq256:
3245+ case clang::X86::BI__builtin_ia32_pmuludq512:
32073246 return interp__builtin_ia32_pmul (S, OpPC, Call, BuiltinID);
3247+
32083248 case Builtin::BI__builtin_elementwise_fma:
32093249 return interp__builtin_elementwise_fma (S, OpPC, Call);
32103250
3251+ case X86::BI__builtin_ia32_selectb_128:
3252+ case X86::BI__builtin_ia32_selectb_256:
3253+ case X86::BI__builtin_ia32_selectb_512:
3254+ case X86::BI__builtin_ia32_selectw_128:
3255+ case X86::BI__builtin_ia32_selectw_256:
3256+ case X86::BI__builtin_ia32_selectw_512:
3257+ case X86::BI__builtin_ia32_selectd_128:
3258+ case X86::BI__builtin_ia32_selectd_256:
3259+ case X86::BI__builtin_ia32_selectd_512:
3260+ case X86::BI__builtin_ia32_selectq_128:
3261+ case X86::BI__builtin_ia32_selectq_256:
3262+ case X86::BI__builtin_ia32_selectq_512:
3263+ case X86::BI__builtin_ia32_selectph_128:
3264+ case X86::BI__builtin_ia32_selectph_256:
3265+ case X86::BI__builtin_ia32_selectph_512:
3266+ case X86::BI__builtin_ia32_selectpbf_128:
3267+ case X86::BI__builtin_ia32_selectpbf_256:
3268+ case X86::BI__builtin_ia32_selectpbf_512:
3269+ case X86::BI__builtin_ia32_selectps_128:
3270+ case X86::BI__builtin_ia32_selectps_256:
3271+ case X86::BI__builtin_ia32_selectps_512:
3272+ case X86::BI__builtin_ia32_selectpd_128:
3273+ case X86::BI__builtin_ia32_selectpd_256:
3274+ case X86::BI__builtin_ia32_selectpd_512:
3275+ return interp__builtin_select (S, OpPC, Call);
3276+
32113277 default :
32123278 S.FFDiag (S.Current ->getLocation (OpPC),
32133279 diag::note_invalid_subexpr_in_const_expr)
0 commit comments