@@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1162111621
1162211622static bool evalShuffleGeneric(
1162311623 EvalInfo &Info, const CallExpr *Call, APValue &Out,
11624- llvm::function_ref<std::pair<unsigned, unsigned >(unsigned, unsigned)>
11624+ llvm::function_ref<std::pair<unsigned, int >(unsigned, unsigned)>
1162511625 GetSourceIndex) {
1162611626
1162711627 const auto *VT = Call->getType()->getAs<VectorType>();
@@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric(
1164411644
1164511645 for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
1164611646 auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
11647- const APValue &Src = (SrcVecIdx == 0) ? A : B;
11648- ResultElements.push_back(Src.getVectorElt(SrcIdx));
11647+
11648+ if (SrcIdx < 0) {
11649+ // Zero out this element
11650+ QualType ElemTy = VT->getElementType();
11651+ ResultElements.push_back(
11652+ APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy))));
11653+ } else {
11654+ const APValue &Src = (SrcVecIdx == 0) ? A : B;
11655+ ResultElements.push_back(Src.getVectorElt(SrcIdx));
11656+ }
1164911657 }
1165011658
1165111659 Out = APValue(ResultElements.data(), ResultElements.size());
@@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1243812446 if (!evalShuffleGeneric(
1243912447 Info, E, R,
1244012448 [](unsigned DstIdx,
12441- unsigned ShuffleMask) -> std::pair<unsigned, unsigned > {
12449+ unsigned ShuffleMask) -> std::pair<unsigned, int > {
1244212450 constexpr unsigned LaneBits = 128u;
1244312451 unsigned NumElemPerLane = LaneBits / 32;
1244412452 unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1245112459 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
1245212460 unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
1245312461 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
12454- return {SrcIdx, LaneOffset + Index};
12462+ return {SrcIdx, static_cast<int>( LaneOffset + Index) };
1245512463 }))
1245612464 return false;
1245712465 return Success(R, E);
@@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1246312471 if (!evalShuffleGeneric(
1246412472 Info, E, R,
1246512473 [](unsigned DstIdx,
12466- unsigned ShuffleMask) -> std::pair<unsigned, unsigned > {
12474+ unsigned ShuffleMask) -> std::pair<unsigned, int > {
1246712475 constexpr unsigned LaneBits = 128u;
1246812476 unsigned NumElemPerLane = LaneBits / 64;
1246912477 unsigned NumSelectableElems = NumElemPerLane / 2;
@@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1247612484 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
1247712485 unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
1247812486 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
12479- return {SrcIdx, LaneOffset + Index};
12487+ return {SrcIdx, static_cast<int>(LaneOffset + Index)};
12488+ }))
12489+ return false;
12490+ return Success(R, E);
12491+ }
12492+ case X86::BI__builtin_ia32_insertps128: {
12493+ APValue R;
12494+ if (!evalShuffleGeneric(
12495+ Info, E, R,
12496+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
12497+ // Bits [3:0]: zero mask - if bit is set, zero this element
12498+ if ((Mask & (1 << DstIdx)) != 0) {
12499+ return {0, -1};
12500+ }
12501+ // Bits [7:6]: select element from source vector Y (0-3)
12502+ // Bits [5:4]: select destination position (0-3)
12503+ unsigned SrcElem = (Mask >> 6) & 0x3;
12504+ unsigned DstElem = (Mask >> 4) & 0x3;
12505+ if (DstIdx == DstElem) {
12506+ // Insert element from source vector (B) at this position
12507+ return {1, static_cast<int>(SrcElem)};
12508+ } else {
12509+ // Copy from destination vector (A)
12510+ return {0, static_cast<int>(DstIdx)};
12511+ }
1248012512 }))
1248112513 return false;
1248212514 return Success(R, E);
0 commit comments