-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[X86] Add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #156822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
a81c406
2fadf3f
f8362b4
ed4a09f
df6242e
9f2fb43
929d7c0
f91aa21
4f5fb87
2422cd4
a3575c5
b2cac3e
197123a
b733157
1ce4883
9a7c138
a65f4fc
9877317
404d261
1d61bf2
b25aa5e
4bc2341
242165a
d2e5d43
6d57df0
03e4db0
c2117f6
5c7412f
90200be
5df6aff
202c165
34ee8ed
9ec2672
7e15580
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
#include "../ExprConstShared.h" | ||
#include "Boolean.h" | ||
#include "EvalEmitter.h" | ||
#include "Floating.h" | ||
#include "Interp.h" | ||
#include "InterpBuiltinBitCast.h" | ||
#include "PrimType.h" | ||
|
@@ -19,6 +20,7 @@ | |
#include "llvm/ADT/StringExtras.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
#include "llvm/Support/SipHash.h" | ||
#include <cassert> | ||
|
||
|
||
namespace clang { | ||
namespace interp { | ||
|
@@ -2742,6 +2744,143 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC, | |
return true; | ||
} | ||
|
||
static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC, | ||
const InterpFrame *Frame, | ||
const CallExpr *Call, | ||
uint32_t BuiltinID) { | ||
assert(Call->getArg(0)->getType()->isVectorType() && | ||
Call->getArg(1)->getType()->isVectorType()); | ||
const Pointer &RHS = S.Stk.pop<Pointer>(); | ||
const Pointer &LHS = S.Stk.pop<Pointer>(); | ||
const Pointer &Dst = S.Stk.peek<Pointer>(); | ||
|
||
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); | ||
PrimType ElemT = *S.getContext().classify(VT->getElementType()); | ||
unsigned SourceLen = VT->getNumElements(); | ||
|
||
assert(SourceLen % 2 == 0 && | ||
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == | ||
SourceLen); | ||
|
||
PrimType DstElemT = *S.getContext().classify( | ||
Call->getType()->castAs<VectorType>()->getElementType()); | ||
unsigned DstElem = 0; | ||
|
||
bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_phaddw128 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddw256 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddd128 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddd256 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256); | ||
|
||
bool IsSaturating = (BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phsubsw128 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_phsubsw256); | ||
|
||
for (unsigned I = 0; I != SourceLen; I += 2) { | ||
|
||
APSInt Elem1; | ||
APSInt Elem2; | ||
INT_TYPE_SWITCH_NO_BOOL(ElemT, { | ||
Elem1 = LHS.elem<T>(I).toAPSInt(); | ||
Elem2 = LHS.elem<T>(I + 1).toAPSInt(); | ||
}); | ||
APSInt Result; | ||
if (IsAdd) { | ||
if (IsSaturating) { | ||
Result = | ||
APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} else { | ||
Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} | ||
} else { | ||
if (IsSaturating) { | ||
Result = | ||
APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} else { | ||
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} | ||
} | ||
INT_TYPE_SWITCH_NO_BOOL(DstElemT, | ||
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); }); | ||
++DstElem; | ||
} | ||
for (unsigned I = 0; I != SourceLen; I += 2) { | ||
APSInt Elem1; | ||
APSInt Elem2; | ||
INT_TYPE_SWITCH_NO_BOOL(ElemT, { | ||
Elem1 = RHS.elem<T>(I).toAPSInt(); | ||
Elem2 = RHS.elem<T>(I + 1).toAPSInt(); | ||
}); | ||
APSInt Result; | ||
if (IsAdd) { | ||
if (IsSaturating) { | ||
Result = | ||
APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} else { | ||
Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} | ||
} else { | ||
if (IsSaturating) { | ||
Result = | ||
APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} else { | ||
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned()); | ||
} | ||
} | ||
INT_TYPE_SWITCH_NO_BOOL(DstElemT, | ||
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); }); | ||
++DstElem; | ||
} | ||
Dst.initializeAllElements(); | ||
return true; | ||
} | ||
|
||
static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC, | ||
const InterpFrame *Frame, | ||
const CallExpr *Call, | ||
uint32_t BuiltinID) { | ||
assert(Call->getArg(0)->getType()->isVectorType() && | ||
Call->getArg(1)->getType()->isVectorType()); | ||
const Pointer &RHS = S.Stk.pop<Pointer>(); | ||
const Pointer &LHS = S.Stk.pop<Pointer>(); | ||
const Pointer &Dst = S.Stk.peek<Pointer>(); | ||
|
||
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); | ||
llvm::RoundingMode RM = getRoundingMode(FPO); | ||
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); | ||
unsigned SourceLen = VT->getNumElements(); | ||
assert(SourceLen % 2 == 0 && | ||
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == | ||
SourceLen); | ||
unsigned DstElem = 0; | ||
bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_haddpd || | ||
BuiltinID == clang::X86::BI__builtin_ia32_haddpd256 || | ||
BuiltinID == clang::X86::BI__builtin_ia32_haddps || | ||
BuiltinID == clang::X86::BI__builtin_ia32_haddps256); | ||
using T = Floating; | ||
for (unsigned I = 0; I != SourceLen; I += 2) { | ||
APFloat Elem1 = LHS.elem<T>(I).getAPFloat(); | ||
APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat(); | ||
if (IsAdd) { | ||
Elem1.add(Elem2, RM); | ||
} else { | ||
Elem1.subtract(Elem2, RM); | ||
} | ||
Dst.elem<T>(DstElem++) = Elem1; | ||
} | ||
for (unsigned I = 0; I != SourceLen; I += 2) { | ||
APFloat Elem1 = RHS.elem<T>(I).getAPFloat(); | ||
APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat(); | ||
if (IsAdd) { | ||
Elem1.add(Elem2, RM); | ||
} else { | ||
Elem1.subtract(Elem2, RM); | ||
} | ||
Dst.elem<T>(DstElem++) = Elem1; | ||
} | ||
Dst.initializeAllElements(); | ||
return true; | ||
} | ||
|
||
static bool interp__builtin_elementwise_triop_fp( | ||
InterpState &S, CodePtr OpPC, const CallExpr *Call, | ||
llvm::function_ref<APFloat(const APFloat &, const APFloat &, | ||
|
@@ -3453,6 +3592,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, | |
case Builtin::BI__builtin_elementwise_min: | ||
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID); | ||
|
||
case clang::X86::BI__builtin_ia32_phaddw128: | ||
case clang::X86::BI__builtin_ia32_phaddw256: | ||
case clang::X86::BI__builtin_ia32_phaddd128: | ||
case clang::X86::BI__builtin_ia32_phaddd256: | ||
case clang::X86::BI__builtin_ia32_phaddsw128: | ||
case clang::X86::BI__builtin_ia32_phaddsw256: | ||
case clang::X86::BI__builtin_ia32_phsubw128: | ||
case clang::X86::BI__builtin_ia32_phsubw256: | ||
case clang::X86::BI__builtin_ia32_phsubd128: | ||
case clang::X86::BI__builtin_ia32_phsubd256: | ||
case clang::X86::BI__builtin_ia32_phsubsw128: | ||
case clang::X86::BI__builtin_ia32_phsubsw256: | ||
return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID); | ||
RKSimon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
case clang::X86::BI__builtin_ia32_haddpd: | ||
case clang::X86::BI__builtin_ia32_haddpd256: | ||
case clang::X86::BI__builtin_ia32_haddps: | ||
case clang::X86::BI__builtin_ia32_haddps256: | ||
case clang::X86::BI__builtin_ia32_hsubpd: | ||
case clang::X86::BI__builtin_ia32_hsubpd256: | ||
case clang::X86::BI__builtin_ia32_hsubps: | ||
case clang::X86::BI__builtin_ia32_hsubps256: | ||
return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID); | ||
RKSimon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
case clang::X86::BI__builtin_ia32_pmuldq128: | ||
case clang::X86::BI__builtin_ia32_pmuldq256: | ||
case clang::X86::BI__builtin_ia32_pmuldq512: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unnecessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, i have remove this