@@ -3411,9 +3411,8 @@ static bool interp__builtin_x86_byteshift(
34113411
34123412static bool interp__builtin_ia32_shuffle_generic (
34133413 InterpState &S, CodePtr OpPC, const CallExpr *Call,
3414- llvm::function_ref<std::pair<unsigned , unsigned >(unsigned , unsigned )>
3415- GetSourceIndex,
3416- llvm::function_ref<bool(unsigned , unsigned )> ShouldZero = nullptr) {
3414+ llvm::function_ref<std::pair<unsigned , int >(unsigned , unsigned )>
3415+ GetSourceIndex) {
34173416
34183417 assert (Call->getNumArgs () == 3 );
34193418 unsigned ShuffleMask = popToAPSInt (S, Call->getArg (2 )).getZExtValue ();
@@ -3428,7 +3427,9 @@ static bool interp__builtin_ia32_shuffle_generic(
34283427 const Pointer &Dst = S.Stk .peek <Pointer>();
34293428
34303429 for (unsigned DstIdx = 0 ; DstIdx != NumElems; ++DstIdx) {
3431- if (ShouldZero && ShouldZero (DstIdx, ShuffleMask)) {
3430+ auto [SrcVecIdx, SrcIdx] = GetSourceIndex (DstIdx, ShuffleMask);
3431+
3432+ if (SrcIdx < 0 ) {
34323433 // Zero out this element
34333434 if (ElemT == PT_Float) {
34343435 Dst.elem <Floating>(DstIdx) = Floating (
@@ -3437,7 +3438,6 @@ static bool interp__builtin_ia32_shuffle_generic(
34373438 INT_TYPE_SWITCH_NO_BOOL (ElemT, { Dst.elem <T>(DstIdx) = T::from (0 ); });
34383439 }
34393440 } else {
3440- auto [SrcVecIdx, SrcIdx] = GetSourceIndex (DstIdx, ShuffleMask);
34413441 const Pointer &Src = (SrcVecIdx == 0 ) ? A : B;
34423442 TYPE_SWITCH (ElemT, { Dst.elem <T>(DstIdx) = Src.elem <T>(SrcIdx); });
34433443 }
@@ -4393,7 +4393,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
43934393 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0 ;
43944394 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
43954395 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
4396- return std::pair<unsigned , unsigned >{SrcIdx, LaneOffset + Index};
4396+ return std::pair<unsigned , int >{SrcIdx, static_cast < int >( LaneOffset + Index) };
43974397 });
43984398 case X86::BI__builtin_ia32_shufpd:
43994399 case X86::BI__builtin_ia32_shufpd256:
@@ -4411,27 +4411,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
44114411 unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0 ;
44124412 unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
44134413 unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
4414- return std::pair<unsigned , unsigned >{SrcIdx, LaneOffset + Index};
4414+ return std::pair<unsigned , int >{SrcIdx, static_cast < int >( LaneOffset + Index) };
44154415 });
44164416 case X86::BI__builtin_ia32_insertps128:
44174417 return interp__builtin_ia32_shuffle_generic (
44184418 S, OpPC, Call,
44194419 [](unsigned DstIdx, unsigned Mask) {
4420+ // Bits [3:0]: zero mask - if bit is set, zero this element
4421+ if ((Mask & (1 << DstIdx)) != 0 ) {
4422+ return std::pair<unsigned , int >{0 , -1 };
4423+ }
44204424 // Bits [7:6]: select element from source vector Y (0-3)
44214425 // Bits [5:4]: select destination position (0-3)
44224426 unsigned SrcElem = (Mask >> 6 ) & 0x3 ;
44234427 unsigned DstElem = (Mask >> 4 ) & 0x3 ;
44244428 if (DstIdx == DstElem) {
44254429 // Insert element from source vector (B) at this position
4426- return std::pair<unsigned , unsigned >{1 , SrcElem};
4430+ return std::pair<unsigned , int >{1 , static_cast < int >( SrcElem) };
44274431 } else {
44284432 // Copy from destination vector (A)
4429- return std::pair<unsigned , unsigned >{0 , DstIdx};
4433+ return std::pair<unsigned , int >{0 , static_cast < int >( DstIdx) };
44304434 }
4431- },
4432- [](unsigned DstIdx, unsigned Mask) {
4433- // Bits [3:0]: zero mask
4434- return (Mask & (1 << DstIdx)) != 0 ;
44354435 });
44364436 case X86::BI__builtin_ia32_pshufb128:
44374437 case X86::BI__builtin_ia32_pshufb256:
0 commit comments