Skip to content

Commit ed4a09f

Browse files
author
whyuuwang
committed
constexpr deal
1 parent f8362b4 commit ed4a09f

File tree

2 files changed

+407
-102
lines changed

2 files changed

+407
-102
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 202 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "../ExprConstShared.h"
99
#include "Boolean.h"
1010
#include "EvalEmitter.h"
11+
#include "Floating.h"
1112
#include "Interp.h"
1213
#include "InterpBuiltinBitCast.h"
1314
#include "PrimType.h"
@@ -19,6 +20,7 @@
1920
#include "llvm/ADT/StringExtras.h"
2021
#include "llvm/Support/ErrorHandling.h"
2122
#include "llvm/Support/SipHash.h"
23+
#include <cassert>
2224

2325
namespace clang {
2426
namespace interp {
@@ -2736,6 +2738,141 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
27362738
return true;
27372739
}
27382740

2741+
static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
2742+
const InterpFrame *Frame,
2743+
const CallExpr *Call,
2744+
uint32_t BuiltinID) {
2745+
assert(Call->getArg(0)->getType()->isVectorType() &&
2746+
Call->getArg(1)->getType()->isVectorType());
2747+
const Pointer &RHS = S.Stk.pop<Pointer>();
2748+
const Pointer &LHS = S.Stk.pop<Pointer>();
2749+
const Pointer &Dst = S.Stk.peek<Pointer>();
2750+
2751+
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
2752+
PrimType ElemT = *S.getContext().classify(VT->getElementType());
2753+
unsigned SourceLen = VT->getNumElements();
2754+
assert(SourceLen % 2 == 0 &&
2755+
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == SourceLen);
2756+
PrimType DstElemT = *S.getContext().classify(
2757+
Call->getType()->castAs<VectorType>()->getElementType());
2758+
unsigned DstElem = 0;
2759+
2760+
bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_phaddw128 ||
2761+
BuiltinID == clang::X86::BI__builtin_ia32_phaddw256 ||
2762+
BuiltinID == clang::X86::BI__builtin_ia32_phaddd128 ||
2763+
BuiltinID == clang::X86::BI__builtin_ia32_phaddd256 ||
2764+
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
2765+
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256);
2766+
2767+
bool IsSaturating = (BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
2768+
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256 ||
2769+
BuiltinID == clang::X86::BI__builtin_ia32_phsubsw128 ||
2770+
BuiltinID == clang::X86::BI__builtin_ia32_phsubsw256);
2771+
2772+
for (unsigned I = 0; I != SourceLen; I += 2) {
2773+
APSInt Elem1;
2774+
APSInt Elem2;
2775+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2776+
Elem1 = LHS.elem<T>(I).toAPSInt();
2777+
Elem2 = LHS.elem<T>(I+1).toAPSInt();
2778+
});
2779+
APSInt Result;
2780+
if (IsAdd) {
2781+
if (IsSaturating) {
2782+
Result = APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
2783+
}else{
2784+
Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
2785+
}
2786+
}else{
2787+
if (IsSaturating) {
2788+
Result =
2789+
APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
2790+
} else {
2791+
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
2792+
}
2793+
}
2794+
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
2795+
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2796+
++DstElem;
2797+
}
2798+
for (unsigned I = 0; I != SourceLen; I += 2) {
2799+
APSInt Elem1;
2800+
APSInt Elem2;
2801+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2802+
Elem1 = RHS.elem<T>(I).toAPSInt();
2803+
Elem2 = RHS.elem<T>(I + 1).toAPSInt();
2804+
});
2805+
APSInt Result;
2806+
if (IsAdd) {
2807+
if (IsSaturating) {
2808+
Result =
2809+
APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
2810+
} else {
2811+
Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
2812+
}
2813+
} else {
2814+
if (IsSaturating) {
2815+
Result = APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
2816+
} else {
2817+
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
2818+
}
2819+
}
2820+
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
2821+
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
2822+
++DstElem;
2823+
}
2824+
Dst.initializeAllElements();
2825+
return true;
2826+
}
2827+
2828+
static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
2829+
const InterpFrame *Frame,
2830+
const CallExpr *Call,
2831+
uint32_t BuiltinID) {
2832+
assert(Call->getArg(0)->getType()->isVectorType() &&
2833+
Call->getArg(1)->getType()->isVectorType());
2834+
const Pointer &RHS = S.Stk.pop<Pointer>();
2835+
const Pointer &LHS = S.Stk.pop<Pointer>();
2836+
const Pointer &Dst = S.Stk.peek<Pointer>();
2837+
2838+
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
2839+
llvm::RoundingMode RM = getRoundingMode(FPO);
2840+
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
2841+
unsigned SourceLen = VT->getNumElements();
2842+
assert(SourceLen % 2 == 0 &&
2843+
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
2844+
SourceLen);
2845+
unsigned DstElem = 0;
2846+
bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_haddpd ||
2847+
BuiltinID == clang::X86::BI__builtin_ia32_haddpd256 ||
2848+
BuiltinID == clang::X86::BI__builtin_ia32_haddps ||
2849+
BuiltinID == clang::X86::BI__builtin_ia32_haddps256);
2850+
using T = Floating;
2851+
for (unsigned I = 0; I != SourceLen; I += 2) {
2852+
APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
2853+
APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
2854+
2855+
if (IsAdd) {
2856+
Elem1.add(Elem2, RM);
2857+
} else {
2858+
Elem1.subtract(Elem2, RM);
2859+
}
2860+
Dst.elem<T>(DstElem++) = Elem1;
2861+
}
2862+
for (unsigned I = 0; I != SourceLen; I += 2) {
2863+
APFloat Elem1 = RHS.elem<T>(I).getAPFloat();
2864+
APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat();
2865+
if (IsAdd) {
2866+
Elem1.add(Elem2, RM);
2867+
} else {
2868+
Elem1.subtract(Elem2, RM);
2869+
}
2870+
Dst.elem<T>(DstElem++) = Elem1;
2871+
}
2872+
Dst.initializeAllElements();
2873+
return true;
2874+
}
2875+
27392876
static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
27402877
const CallExpr *Call) {
27412878
assert(Call->getNumArgs() == 3);
@@ -3356,49 +3493,73 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33563493
case Builtin::BI__builtin_elementwise_min:
33573494
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
33583495

3359-
case clang::X86::BI__builtin_ia32_pmuldq128:
3360-
case clang::X86::BI__builtin_ia32_pmuldq256:
3361-
case clang::X86::BI__builtin_ia32_pmuldq512:
3362-
case clang::X86::BI__builtin_ia32_pmuludq128:
3363-
case clang::X86::BI__builtin_ia32_pmuludq256:
3364-
case clang::X86::BI__builtin_ia32_pmuludq512:
3365-
return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
3366-
3367-
case Builtin::BI__builtin_elementwise_fma:
3368-
return interp__builtin_elementwise_fma(S, OpPC, Call);
3369-
3370-
case X86::BI__builtin_ia32_selectb_128:
3371-
case X86::BI__builtin_ia32_selectb_256:
3372-
case X86::BI__builtin_ia32_selectb_512:
3373-
case X86::BI__builtin_ia32_selectw_128:
3374-
case X86::BI__builtin_ia32_selectw_256:
3375-
case X86::BI__builtin_ia32_selectw_512:
3376-
case X86::BI__builtin_ia32_selectd_128:
3377-
case X86::BI__builtin_ia32_selectd_256:
3378-
case X86::BI__builtin_ia32_selectd_512:
3379-
case X86::BI__builtin_ia32_selectq_128:
3380-
case X86::BI__builtin_ia32_selectq_256:
3381-
case X86::BI__builtin_ia32_selectq_512:
3382-
case X86::BI__builtin_ia32_selectph_128:
3383-
case X86::BI__builtin_ia32_selectph_256:
3384-
case X86::BI__builtin_ia32_selectph_512:
3385-
case X86::BI__builtin_ia32_selectpbf_128:
3386-
case X86::BI__builtin_ia32_selectpbf_256:
3387-
case X86::BI__builtin_ia32_selectpbf_512:
3388-
case X86::BI__builtin_ia32_selectps_128:
3389-
case X86::BI__builtin_ia32_selectps_256:
3390-
case X86::BI__builtin_ia32_selectps_512:
3391-
case X86::BI__builtin_ia32_selectpd_128:
3392-
case X86::BI__builtin_ia32_selectpd_256:
3393-
case X86::BI__builtin_ia32_selectpd_512:
3394-
return interp__builtin_select(S, OpPC, Call);
3496+
case clang::X86::BI__builtin_ia32_phaddw128:
3497+
case clang::X86::BI__builtin_ia32_phaddw256:
3498+
case clang::X86::BI__builtin_ia32_phaddd128:
3499+
case clang::X86::BI__builtin_ia32_phaddd256:
3500+
case clang::X86::BI__builtin_ia32_phaddsw128:
3501+
case clang::X86::BI__builtin_ia32_phaddsw256:
3502+
case clang::X86::BI__builtin_ia32_phsubw128:
3503+
case clang::X86::BI__builtin_ia32_phsubw256:
3504+
case clang::X86::BI__builtin_ia32_phsubd128:
3505+
case clang::X86::BI__builtin_ia32_phsubd256:
3506+
case clang::X86::BI__builtin_ia32_phsubsw128:
3507+
case clang::X86::BI__builtin_ia32_phsubsw256:
3508+
3509+
return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID);
3510+
case clang::X86::BI__builtin_ia32_haddpd:
3511+
case clang::X86::BI__builtin_ia32_haddpd256:
3512+
case clang::X86::BI__builtin_ia32_haddps:
3513+
case clang::X86::BI__builtin_ia32_haddps256:
3514+
case clang::X86::BI__builtin_ia32_hsubpd:
3515+
case clang::X86::BI__builtin_ia32_hsubpd256:
3516+
case clang::X86::BI__builtin_ia32_hsubps:
3517+
case clang::X86::BI__builtin_ia32_hsubps256:
3518+
return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID);
33953519

3396-
default:
3397-
S.FFDiag(S.Current->getLocation(OpPC),
3398-
diag::note_invalid_subexpr_in_const_expr)
3399-
<< S.Current->getRange(OpPC);
3520+
case clang::X86::BI__builtin_ia32_pmuldq128:
3521+
case clang::X86::BI__builtin_ia32_pmuldq256:
3522+
case clang::X86::BI__builtin_ia32_pmuldq512:
3523+
case clang::X86::BI__builtin_ia32_pmuludq128:
3524+
case clang::X86::BI__builtin_ia32_pmuludq256:
3525+
case clang::X86::BI__builtin_ia32_pmuludq512:
3526+
return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
3527+
3528+
case Builtin::BI__builtin_elementwise_fma:
3529+
return interp__builtin_elementwise_fma(S, OpPC, Call);
3530+
3531+
case X86::BI__builtin_ia32_selectb_128:
3532+
case X86::BI__builtin_ia32_selectb_256:
3533+
case X86::BI__builtin_ia32_selectb_512:
3534+
case X86::BI__builtin_ia32_selectw_128:
3535+
case X86::BI__builtin_ia32_selectw_256:
3536+
case X86::BI__builtin_ia32_selectw_512:
3537+
case X86::BI__builtin_ia32_selectd_128:
3538+
case X86::BI__builtin_ia32_selectd_256:
3539+
case X86::BI__builtin_ia32_selectd_512:
3540+
case X86::BI__builtin_ia32_selectq_128:
3541+
case X86::BI__builtin_ia32_selectq_256:
3542+
case X86::BI__builtin_ia32_selectq_512:
3543+
case X86::BI__builtin_ia32_selectph_128:
3544+
case X86::BI__builtin_ia32_selectph_256:
3545+
case X86::BI__builtin_ia32_selectph_512:
3546+
case X86::BI__builtin_ia32_selectpbf_128:
3547+
case X86::BI__builtin_ia32_selectpbf_256:
3548+
case X86::BI__builtin_ia32_selectpbf_512:
3549+
case X86::BI__builtin_ia32_selectps_128:
3550+
case X86::BI__builtin_ia32_selectps_256:
3551+
case X86::BI__builtin_ia32_selectps_512:
3552+
case X86::BI__builtin_ia32_selectpd_128:
3553+
case X86::BI__builtin_ia32_selectpd_256:
3554+
case X86::BI__builtin_ia32_selectpd_512:
3555+
return interp__builtin_select(S, OpPC, Call);
34003556

3401-
return false;
3557+
default:
3558+
S.FFDiag(S.Current->getLocation(OpPC),
3559+
diag::note_invalid_subexpr_in_const_expr)
3560+
<< S.Current->getRange(OpPC);
3561+
3562+
return false;
34023563
}
34033564

34043565
llvm_unreachable("Unhandled builtin ID");

0 commit comments

Comments
 (0)