|
8 | 8 | #include "../ExprConstShared.h" |
9 | 9 | #include "Boolean.h" |
10 | 10 | #include "EvalEmitter.h" |
| 11 | +#include "Floating.h" |
11 | 12 | #include "Interp.h" |
12 | 13 | #include "InterpBuiltinBitCast.h" |
13 | 14 | #include "PrimType.h" |
|
19 | 20 | #include "llvm/ADT/StringExtras.h" |
20 | 21 | #include "llvm/Support/ErrorHandling.h" |
21 | 22 | #include "llvm/Support/SipHash.h" |
| 23 | +#include <cassert> |
22 | 24 |
|
23 | 25 | namespace clang { |
24 | 26 | namespace interp { |
@@ -2736,6 +2738,141 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC, |
2736 | 2738 | return true; |
2737 | 2739 | } |
2738 | 2740 |
|
| 2741 | +static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC, |
| 2742 | + const InterpFrame *Frame, |
| 2743 | + const CallExpr *Call, |
| 2744 | + uint32_t BuiltinID) { |
| 2745 | + assert(Call->getArg(0)->getType()->isVectorType() && |
| 2746 | + Call->getArg(1)->getType()->isVectorType()); |
| 2747 | + const Pointer &RHS = S.Stk.pop<Pointer>(); |
| 2748 | + const Pointer &LHS = S.Stk.pop<Pointer>(); |
| 2749 | + const Pointer &Dst = S.Stk.peek<Pointer>(); |
| 2750 | + |
| 2751 | + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); |
| 2752 | + PrimType ElemT = *S.getContext().classify(VT->getElementType()); |
| 2753 | + unsigned SourceLen = VT->getNumElements(); |
| 2754 | + assert(SourceLen % 2 == 0 && |
| 2755 | + Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == SourceLen); |
| 2756 | + PrimType DstElemT = *S.getContext().classify( |
| 2757 | + Call->getType()->castAs<VectorType>()->getElementType()); |
| 2758 | + unsigned DstElem = 0; |
| 2759 | + |
| 2760 | + bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_phaddw128 || |
| 2761 | + BuiltinID == clang::X86::BI__builtin_ia32_phaddw256 || |
| 2762 | + BuiltinID == clang::X86::BI__builtin_ia32_phaddd128 || |
| 2763 | + BuiltinID == clang::X86::BI__builtin_ia32_phaddd256 || |
| 2764 | + BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 || |
| 2765 | + BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256); |
| 2766 | + |
| 2767 | + bool IsSaturating = (BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 || |
| 2768 | + BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256 || |
| 2769 | + BuiltinID == clang::X86::BI__builtin_ia32_phsubsw128 || |
| 2770 | + BuiltinID == clang::X86::BI__builtin_ia32_phsubsw256); |
| 2771 | + |
| 2772 | + for (unsigned I = 0; I != SourceLen; I += 2) { |
| 2773 | + APSInt Elem1; |
| 2774 | + APSInt Elem2; |
| 2775 | + INT_TYPE_SWITCH_NO_BOOL(ElemT, { |
| 2776 | + Elem1 = LHS.elem<T>(I).toAPSInt(); |
| 2777 | + Elem2 = LHS.elem<T>(I+1).toAPSInt(); |
| 2778 | + }); |
| 2779 | + APSInt Result; |
| 2780 | + if (IsAdd) { |
| 2781 | + if (IsSaturating) { |
| 2782 | + Result = APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2783 | + }else{ |
| 2784 | + Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2785 | + } |
| 2786 | + }else{ |
| 2787 | + if (IsSaturating) { |
| 2788 | + Result = |
| 2789 | + APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2790 | + } else { |
| 2791 | + Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2792 | + } |
| 2793 | + } |
| 2794 | + INT_TYPE_SWITCH_NO_BOOL(DstElemT, |
| 2795 | + { Dst.elem<T>(DstElem) = static_cast<T>(Result); }); |
| 2796 | + ++DstElem; |
| 2797 | + } |
| 2798 | + for (unsigned I = 0; I != SourceLen; I += 2) { |
| 2799 | + APSInt Elem1; |
| 2800 | + APSInt Elem2; |
| 2801 | + INT_TYPE_SWITCH_NO_BOOL(ElemT, { |
| 2802 | + Elem1 = RHS.elem<T>(I).toAPSInt(); |
| 2803 | + Elem2 = RHS.elem<T>(I + 1).toAPSInt(); |
| 2804 | + }); |
| 2805 | + APSInt Result; |
| 2806 | + if (IsAdd) { |
| 2807 | + if (IsSaturating) { |
| 2808 | + Result = |
| 2809 | + APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2810 | + } else { |
| 2811 | + Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2812 | + } |
| 2813 | + } else { |
| 2814 | + if (IsSaturating) { |
| 2815 | + Result = APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2816 | + } else { |
| 2817 | + Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); |
| 2818 | + } |
| 2819 | + } |
| 2820 | + INT_TYPE_SWITCH_NO_BOOL(DstElemT, |
| 2821 | + { Dst.elem<T>(DstElem) = static_cast<T>(Result); }); |
| 2822 | + ++DstElem; |
| 2823 | + } |
| 2824 | + Dst.initializeAllElements(); |
| 2825 | + return true; |
| 2826 | +} |
| 2827 | + |
| 2828 | +static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC, |
| 2829 | + const InterpFrame *Frame, |
| 2830 | + const CallExpr *Call, |
| 2831 | + uint32_t BuiltinID) { |
| 2832 | + assert(Call->getArg(0)->getType()->isVectorType() && |
| 2833 | + Call->getArg(1)->getType()->isVectorType()); |
| 2834 | + const Pointer &RHS = S.Stk.pop<Pointer>(); |
| 2835 | + const Pointer &LHS = S.Stk.pop<Pointer>(); |
| 2836 | + const Pointer &Dst = S.Stk.peek<Pointer>(); |
| 2837 | + |
| 2838 | + FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); |
| 2839 | + llvm::RoundingMode RM = getRoundingMode(FPO); |
| 2840 | + const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); |
| 2841 | + unsigned SourceLen = VT->getNumElements(); |
| 2842 | + assert(SourceLen % 2 == 0 && |
| 2843 | + Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == |
| 2844 | + SourceLen); |
| 2845 | + unsigned DstElem = 0; |
| 2846 | + bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_haddpd || |
| 2847 | + BuiltinID == clang::X86::BI__builtin_ia32_haddpd256 || |
| 2848 | + BuiltinID == clang::X86::BI__builtin_ia32_haddps || |
| 2849 | + BuiltinID == clang::X86::BI__builtin_ia32_haddps256); |
| 2850 | + using T = Floating; |
| 2851 | + for (unsigned I = 0; I != SourceLen; I += 2) { |
| 2852 | + APFloat Elem1 = LHS.elem<T>(I).getAPFloat(); |
| 2853 | + APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat(); |
| 2854 | + |
| 2855 | + if (IsAdd) { |
| 2856 | + Elem1.add(Elem2, RM); |
| 2857 | + } else { |
| 2858 | + Elem1.subtract(Elem2, RM); |
| 2859 | + } |
| 2860 | + Dst.elem<T>(DstElem++) = Elem1; |
| 2861 | + } |
| 2862 | + for (unsigned I = 0; I != SourceLen; I += 2) { |
| 2863 | + APFloat Elem1 = RHS.elem<T>(I).getAPFloat(); |
| 2864 | + APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat(); |
| 2865 | + if (IsAdd) { |
| 2866 | + Elem1.add(Elem2, RM); |
| 2867 | + } else { |
| 2868 | + Elem1.subtract(Elem2, RM); |
| 2869 | + } |
| 2870 | + Dst.elem<T>(DstElem++) = Elem1; |
| 2871 | + } |
| 2872 | + Dst.initializeAllElements(); |
| 2873 | + return true; |
| 2874 | +} |
| 2875 | + |
2739 | 2876 | static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC, |
2740 | 2877 | const CallExpr *Call) { |
2741 | 2878 | assert(Call->getNumArgs() == 3); |
@@ -3356,49 +3493,73 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, |
3356 | 3493 | case Builtin::BI__builtin_elementwise_min: |
3357 | 3494 | return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID); |
3358 | 3495 |
|
3359 | | - case clang::X86::BI__builtin_ia32_pmuldq128: |
3360 | | - case clang::X86::BI__builtin_ia32_pmuldq256: |
3361 | | - case clang::X86::BI__builtin_ia32_pmuldq512: |
3362 | | - case clang::X86::BI__builtin_ia32_pmuludq128: |
3363 | | - case clang::X86::BI__builtin_ia32_pmuludq256: |
3364 | | - case clang::X86::BI__builtin_ia32_pmuludq512: |
3365 | | - return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID); |
3366 | | - |
3367 | | - case Builtin::BI__builtin_elementwise_fma: |
3368 | | - return interp__builtin_elementwise_fma(S, OpPC, Call); |
3369 | | - |
3370 | | - case X86::BI__builtin_ia32_selectb_128: |
3371 | | - case X86::BI__builtin_ia32_selectb_256: |
3372 | | - case X86::BI__builtin_ia32_selectb_512: |
3373 | | - case X86::BI__builtin_ia32_selectw_128: |
3374 | | - case X86::BI__builtin_ia32_selectw_256: |
3375 | | - case X86::BI__builtin_ia32_selectw_512: |
3376 | | - case X86::BI__builtin_ia32_selectd_128: |
3377 | | - case X86::BI__builtin_ia32_selectd_256: |
3378 | | - case X86::BI__builtin_ia32_selectd_512: |
3379 | | - case X86::BI__builtin_ia32_selectq_128: |
3380 | | - case X86::BI__builtin_ia32_selectq_256: |
3381 | | - case X86::BI__builtin_ia32_selectq_512: |
3382 | | - case X86::BI__builtin_ia32_selectph_128: |
3383 | | - case X86::BI__builtin_ia32_selectph_256: |
3384 | | - case X86::BI__builtin_ia32_selectph_512: |
3385 | | - case X86::BI__builtin_ia32_selectpbf_128: |
3386 | | - case X86::BI__builtin_ia32_selectpbf_256: |
3387 | | - case X86::BI__builtin_ia32_selectpbf_512: |
3388 | | - case X86::BI__builtin_ia32_selectps_128: |
3389 | | - case X86::BI__builtin_ia32_selectps_256: |
3390 | | - case X86::BI__builtin_ia32_selectps_512: |
3391 | | - case X86::BI__builtin_ia32_selectpd_128: |
3392 | | - case X86::BI__builtin_ia32_selectpd_256: |
3393 | | - case X86::BI__builtin_ia32_selectpd_512: |
3394 | | - return interp__builtin_select(S, OpPC, Call); |
| 3496 | + case clang::X86::BI__builtin_ia32_phaddw128: |
| 3497 | + case clang::X86::BI__builtin_ia32_phaddw256: |
| 3498 | + case clang::X86::BI__builtin_ia32_phaddd128: |
| 3499 | + case clang::X86::BI__builtin_ia32_phaddd256: |
| 3500 | + case clang::X86::BI__builtin_ia32_phaddsw128: |
| 3501 | + case clang::X86::BI__builtin_ia32_phaddsw256: |
| 3502 | + case clang::X86::BI__builtin_ia32_phsubw128: |
| 3503 | + case clang::X86::BI__builtin_ia32_phsubw256: |
| 3504 | + case clang::X86::BI__builtin_ia32_phsubd128: |
| 3505 | + case clang::X86::BI__builtin_ia32_phsubd256: |
| 3506 | + case clang::X86::BI__builtin_ia32_phsubsw128: |
| 3507 | + case clang::X86::BI__builtin_ia32_phsubsw256: |
| 3508 | + |
| 3509 | + return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID); |
| 3510 | + case clang::X86::BI__builtin_ia32_haddpd: |
| 3511 | + case clang::X86::BI__builtin_ia32_haddpd256: |
| 3512 | + case clang::X86::BI__builtin_ia32_haddps: |
| 3513 | + case clang::X86::BI__builtin_ia32_haddps256: |
| 3514 | + case clang::X86::BI__builtin_ia32_hsubpd: |
| 3515 | + case clang::X86::BI__builtin_ia32_hsubpd256: |
| 3516 | + case clang::X86::BI__builtin_ia32_hsubps: |
| 3517 | + case clang::X86::BI__builtin_ia32_hsubps256: |
| 3518 | + return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID); |
3395 | 3519 |
|
3396 | | - default: |
3397 | | - S.FFDiag(S.Current->getLocation(OpPC), |
3398 | | - diag::note_invalid_subexpr_in_const_expr) |
3399 | | - << S.Current->getRange(OpPC); |
| 3520 | + case clang::X86::BI__builtin_ia32_pmuldq128: |
| 3521 | + case clang::X86::BI__builtin_ia32_pmuldq256: |
| 3522 | + case clang::X86::BI__builtin_ia32_pmuldq512: |
| 3523 | + case clang::X86::BI__builtin_ia32_pmuludq128: |
| 3524 | + case clang::X86::BI__builtin_ia32_pmuludq256: |
| 3525 | + case clang::X86::BI__builtin_ia32_pmuludq512: |
| 3526 | + return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID); |
| 3527 | + |
| 3528 | + case Builtin::BI__builtin_elementwise_fma: |
| 3529 | + return interp__builtin_elementwise_fma(S, OpPC, Call); |
| 3530 | + |
| 3531 | + case X86::BI__builtin_ia32_selectb_128: |
| 3532 | + case X86::BI__builtin_ia32_selectb_256: |
| 3533 | + case X86::BI__builtin_ia32_selectb_512: |
| 3534 | + case X86::BI__builtin_ia32_selectw_128: |
| 3535 | + case X86::BI__builtin_ia32_selectw_256: |
| 3536 | + case X86::BI__builtin_ia32_selectw_512: |
| 3537 | + case X86::BI__builtin_ia32_selectd_128: |
| 3538 | + case X86::BI__builtin_ia32_selectd_256: |
| 3539 | + case X86::BI__builtin_ia32_selectd_512: |
| 3540 | + case X86::BI__builtin_ia32_selectq_128: |
| 3541 | + case X86::BI__builtin_ia32_selectq_256: |
| 3542 | + case X86::BI__builtin_ia32_selectq_512: |
| 3543 | + case X86::BI__builtin_ia32_selectph_128: |
| 3544 | + case X86::BI__builtin_ia32_selectph_256: |
| 3545 | + case X86::BI__builtin_ia32_selectph_512: |
| 3546 | + case X86::BI__builtin_ia32_selectpbf_128: |
| 3547 | + case X86::BI__builtin_ia32_selectpbf_256: |
| 3548 | + case X86::BI__builtin_ia32_selectpbf_512: |
| 3549 | + case X86::BI__builtin_ia32_selectps_128: |
| 3550 | + case X86::BI__builtin_ia32_selectps_256: |
| 3551 | + case X86::BI__builtin_ia32_selectps_512: |
| 3552 | + case X86::BI__builtin_ia32_selectpd_128: |
| 3553 | + case X86::BI__builtin_ia32_selectpd_256: |
| 3554 | + case X86::BI__builtin_ia32_selectpd_512: |
| 3555 | + return interp__builtin_select(S, OpPC, Call); |
3400 | 3556 |
|
3401 | | - return false; |
| 3557 | + default: |
| 3558 | + S.FFDiag(S.Current->getLocation(OpPC), |
| 3559 | + diag::note_invalid_subexpr_in_const_expr) |
| 3560 | + << S.Current->getRange(OpPC); |
| 3561 | + |
| 3562 | + return false; |
3402 | 3563 | } |
3403 | 3564 |
|
3404 | 3565 | llvm_unreachable("Unhandled builtin ID"); |
|
0 commit comments