Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a81c406
deal this issues 155395
Sep 4, 2025
2fadf3f
deal issues 15595
Sep 4, 2025
f8362b4
Merge branch 'llvm:main' into main
whytolearn Sep 4, 2025
ed4a09f
constexpr deal
Sep 11, 2025
df6242e
adjust unit test #146940
Sep 13, 2025
9f2fb43
Merge remote-tracking branch 'upstream/main'
Sep 13, 2025
929d7c0
Merge branch 'main' into main
whytolearn Sep 13, 2025
f91aa21
adjust test case and function
Sep 26, 2025
4f5fb87
undo the unintentional formatting of the code
Sep 26, 2025
2422cd4
Merge branch 'main' into main
whytolearn Sep 26, 2025
a3575c5
Merge branch 'main' into main
whytolearn Sep 26, 2025
b2cac3e
adjust code
Sep 26, 2025
197123a
adjust code for mm256
Sep 28, 2025
b733157
format code
Sep 28, 2025
1ce4883
Merge branch 'main' into main
whytolearn Sep 29, 2025
9a7c138
deal all 256 double pane ins
Oct 2, 2025
a65f4fc
deal all 256 double pane ins
Oct 2, 2025
9877317
adjust for 128 and 256 oprand
Oct 7, 2025
404d261
Merge branch 'main' into main
whytolearn Oct 7, 2025
1d61bf2
undo some bad format for .td file
Oct 7, 2025
b25aa5e
Merge branch 'main' into main
whytolearn Oct 9, 2025
4bc2341
merge disperse operation
Oct 10, 2025
242165a
Merge remote-tracking branch 'upstream/main'
Oct 10, 2025
d2e5d43
Merge remote-tracking branch 'upstream/main'
Oct 10, 2025
6d57df0
Merge branch 'main' into main
whytolearn Oct 11, 2025
03e4db0
Merge branch 'main' into main
RKSimon Oct 13, 2025
c2117f6
Update clang/lib/AST/ByteCode/InterpBuiltin.cpp
whytolearn Oct 13, 2025
5c7412f
Update clang/lib/AST/ByteCode/InterpBuiltin.cpp
whytolearn Oct 13, 2025
90200be
Merge branch 'main' into main
whytolearn Oct 13, 2025
5df6aff
Update clang/lib/AST/ExprConstant.cpp
whytolearn Oct 13, 2025
202c165
bad merger delate and code format
Oct 13, 2025
34ee8ed
Merge branch 'main' into main
whytolearn Oct 13, 2025
9ec2672
Merge branch 'main' into main
whytolearn Oct 13, 2025
7e15580
Merge branch 'main' into main
RKSimon Oct 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "../ExprConstShared.h"
#include "Boolean.h"
#include "EvalEmitter.h"
#include "Floating.h"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i have remove this

#include "Interp.h"
#include "InterpBuiltinBitCast.h"
#include "PrimType.h"
Expand All @@ -19,6 +20,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SipHash.h"
#include <cassert>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, have delete


namespace clang {
namespace interp {
Expand Down Expand Up @@ -2742,6 +2744,143 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp_builtin_ia32ph_add_sub(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call,
uint32_t BuiltinID) {
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
PrimType ElemT = *S.getContext().classify(VT->getElementType());
unsigned SourceLen = VT->getNumElements();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  unsigned NumElts= VT->getNumElements();
  unsigned EltBits = ASTCtx.getIntWidth(VT->getElementType());
  unsigned EltsPerLane = 128 / SrcBits;
  unsigned Lanes = NumElts * EltBits / 128;

assert(SourceLen % 2 == 0 &&
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
SourceLen);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't waste your time with asserts like this - the defs in BuiltsX86.td mean that the types should be correct, else Sema will catch it.

PrimType DstElemT = *S.getContext().classify(
Call->getType()->castAs<VectorType>()->getElementType());
unsigned DstElem = 0;

bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_phaddw128 ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddw256 ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddd128 ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddd256 ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256);

bool IsSaturating = (BuiltinID == clang::X86::BI__builtin_ia32_phaddsw128 ||
BuiltinID == clang::X86::BI__builtin_ia32_phaddsw256 ||
BuiltinID == clang::X86::BI__builtin_ia32_phsubsw128 ||
BuiltinID == clang::X86::BI__builtin_ia32_phsubsw256);

for (unsigned I = 0; I != SourceLen; I += 2) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for (unsigned Lane = 0; Lane != NumElts; Lane += EltsPerLane) {
  for (unsigned I = 0; I != EltsPerLane; I += 2) {
    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
      APSInt Elem1 = LHS.elem<T>(Lane + I).toAPSInt();
      APSInt Elem2 = LHS.elem<T>(Lane + I + 1).toAPSInt();
      Dst.elem<T>(Lane + I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
    });
  }
  for (unsigned I = 0; I != SrcPerLane; I += 2) {
    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
      APSInt Elem1 = RLHS.elem<T>(Lane + I).toAPSInt();
      APSInt Elem2 = RHS.elem<T>(Lane + I + 1).toAPSInt();
      Dst.elem<T>(Lane + EltsPerLane + I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
    });
  }
}

APSInt Elem1;
APSInt Elem2;
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
Elem1 = LHS.elem<T>(I).toAPSInt();
Elem2 = LHS.elem<T>(I + 1).toAPSInt();
});
APSInt Result;
if (IsAdd) {
if (IsSaturating) {
Result =
APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
} else {
Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
}
} else {
if (IsSaturating) {
Result =
APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
} else {
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
}
}
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
++DstElem;
}
for (unsigned I = 0; I != SourceLen; I += 2) {
APSInt Elem1;
APSInt Elem2;
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
Elem1 = RHS.elem<T>(I).toAPSInt();
Elem2 = RHS.elem<T>(I + 1).toAPSInt();
});
APSInt Result;
if (IsAdd) {
if (IsSaturating) {
Result =
APSInt(Elem1.sadd_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
} else {
Result = APSInt(Elem1 + Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
}
} else {
if (IsSaturating) {
Result =
APSInt(Elem1.ssub_sat(Elem2), /*IsUnsigned=*/Elem1.isUnsigned());
} else {
Result = APSInt(Elem1 - Elem2, /*IsUnsigned=*/Elem1.isUnsigned());
}
}
INT_TYPE_SWITCH_NO_BOOL(DstElemT,
{ Dst.elem<T>(DstElem) = static_cast<T>(Result); });
++DstElem;
}
Dst.initializeAllElements();
return true;
}

static bool interp_builtin_floatph_add_sub(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call,
uint32_t BuiltinID) {
assert(Call->getArg(0)->getType()->isVectorType() &&
Call->getArg(1)->getType()->isVectorType());
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
llvm::RoundingMode RM = getRoundingMode(FPO);
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
unsigned SourceLen = VT->getNumElements();
assert(SourceLen % 2 == 0 &&
Call->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
SourceLen);
unsigned DstElem = 0;
bool IsAdd = (BuiltinID == clang::X86::BI__builtin_ia32_haddpd ||
BuiltinID == clang::X86::BI__builtin_ia32_haddpd256 ||
BuiltinID == clang::X86::BI__builtin_ia32_haddps ||
BuiltinID == clang::X86::BI__builtin_ia32_haddps256);
using T = Floating;
for (unsigned I = 0; I != SourceLen; I += 2) {
APFloat Elem1 = LHS.elem<T>(I).getAPFloat();
APFloat Elem2 = LHS.elem<T>(I + 1).getAPFloat();
if (IsAdd) {
Elem1.add(Elem2, RM);
} else {
Elem1.subtract(Elem2, RM);
}
Dst.elem<T>(DstElem++) = Elem1;
}
for (unsigned I = 0; I != SourceLen; I += 2) {
APFloat Elem1 = RHS.elem<T>(I).getAPFloat();
APFloat Elem2 = RHS.elem<T>(I + 1).getAPFloat();
if (IsAdd) {
Elem1.add(Elem2, RM);
} else {
Elem1.subtract(Elem2, RM);
}
Dst.elem<T>(DstElem++) = Elem1;
}
Dst.initializeAllElements();
return true;
}

static bool interp__builtin_elementwise_triop_fp(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
Expand Down Expand Up @@ -3453,6 +3592,30 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_phaddw128:
case clang::X86::BI__builtin_ia32_phaddw256:
case clang::X86::BI__builtin_ia32_phaddd128:
case clang::X86::BI__builtin_ia32_phaddd256:
case clang::X86::BI__builtin_ia32_phaddsw128:
case clang::X86::BI__builtin_ia32_phaddsw256:
case clang::X86::BI__builtin_ia32_phsubw128:
case clang::X86::BI__builtin_ia32_phsubw256:
case clang::X86::BI__builtin_ia32_phsubd128:
case clang::X86::BI__builtin_ia32_phsubd256:
case clang::X86::BI__builtin_ia32_phsubsw128:
case clang::X86::BI__builtin_ia32_phsubsw256:
return interp_builtin_ia32ph_add_sub(S, OpPC, Frame, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_haddpd:
case clang::X86::BI__builtin_ia32_haddpd256:
case clang::X86::BI__builtin_ia32_haddps:
case clang::X86::BI__builtin_ia32_haddps256:
case clang::X86::BI__builtin_ia32_hsubpd:
case clang::X86::BI__builtin_ia32_hsubpd256:
case clang::X86::BI__builtin_ia32_hsubps:
case clang::X86::BI__builtin_ia32_hsubps256:
return interp_builtin_floatph_add_sub(S, OpPC, Frame, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
Expand Down
Loading
Loading