diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 77e599587edc3..08b82b03b7865 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -145,6 +145,10 @@ let Features = "mmx", Header = "mmintrin.h", Attributes = [NoThrow, Const] in { def _m_prefetch : X86LibBuiltin<"void(void *)">; } +let Features = "mmx", Attributes = [NoThrow, Const, Constexpr] in { + def pshufw : X86Builtin<"_Vector<4, short>(_Vector<4, short>, _Constant int)">; +} + // PRFCHW let Features = "prfchw", Header = "intrin.h", Attributes = [NoThrow, Const] in { def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">; @@ -217,10 +221,13 @@ let Features = "sse2", Attributes = [NoThrow] in { def movnti : X86Builtin<"void(int *, int)">; } -let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; +let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; + def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; +} + +let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; @@ -569,6 +576,12 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; + def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; + def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; +} + let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; @@ -584,9 +597,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; - def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; - def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; - def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; @@ -1989,9 +1999,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; +} + +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; + def pshufd512_mask : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, _Vector<16, int>, unsigned short)">; + def pshufd512_maskz : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int, unsigned short)">; +} + +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pshufd256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, _Vector<8, int>, unsigned char)">; + def pshufd256_maskz : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int, unsigned char)">; +} + +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pshufd128_mask : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, _Vector<4, int>, unsigned char)">; + def pshufd128_maskz : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int, unsigned char)">; +} + +let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; } @@ -3266,7 +3295,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> } let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">; def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">; } @@ -5114,3 +5142,25 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256> let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">; } + +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def pshuflw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">; + def pshuflw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">; + def pshufhw512_mask : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, _Vector<32, short>, unsigned int)">; + def pshufhw512_maskz : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int, unsigned int)">; +} + + +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def pshuflw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">; + def pshuflw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">; + def pshufhw256_mask : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, _Vector<16, short>, unsigned short)">; + def pshufhw256_maskz : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int, unsigned short)">; +} + +let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def pshuflw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">; + def pshuflw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">; + def pshufhw128_mask : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, _Vector<8, short>, unsigned char)">; + def pshufhw128_maskz : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int, unsigned char)">; +} diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 891344d4e6ed0..1156626a30c8a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2862,6 +2862,218 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_pshuflw_common(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + const unsigned NumArgs = Call->getNumArgs(); + assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4); + APSInt K; + Pointer SrcPT; + const bool HasMask = (NumArgs == 3) || (NumArgs == 4); + const bool IsMaskZ = (NumArgs == 3); + if (NumArgs == 4) { + K = popToAPSInt(S, Call->getArg(3)); + SrcPT = S.Stk.pop(); + } else if (NumArgs == 3) { + K = popToAPSInt(S, Call->getArg(2)); + } + + APSInt Imm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + const unsigned NumElems = Dst.getNumElems(); + const PrimType ElemT = Dst.getFieldDesc()->getPrimType(); + const unsigned ElemBits = 16; + const unsigned LaneElems = 128u / ElemBits; + const unsigned Half = 4; + assert(NumElems % LaneElems == 0 && "pshuflw expects 128-bit lanes"); + const uint8_t Ctl = static_cast(Imm.getZExtValue()); + + for (unsigned i = 0; i != NumElems; ++i) { + const unsigned laneBase = (i / LaneElems) * LaneElems; + const unsigned inLane = i % LaneElems; + + unsigned srcIdx; + if (inLane < Half) { + const unsigned pos = inLane; + const unsigned sel = (Ctl >> (2 * pos)) & 0x3; + srcIdx = laneBase + sel; + } else { + srcIdx = i; + } + + APSInt Chosen; + INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem(srcIdx).toAPSInt(); }); + + if (!HasMask) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Chosen); }); + continue; + } + + const bool Keep = + (i < static_cast(K.getBitWidth())) ? K[i] : false; + + if (Keep) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Chosen); }); + } else if (IsMaskZ) { + APSInt Zero(APInt(Chosen.getBitWidth(), 0)); + Zero.setIsSigned(Chosen.isSigned()); + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Zero); }); + } else { + APSInt PT; + INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem(i).toAPSInt(); }); + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem(i) = static_cast(PT); }); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_pshufhw_common(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + (void)OpPC; + const unsigned NumArgs = Call->getNumArgs(); + assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4); + + APSInt K; + Pointer SrcPT; + const bool HasMask = (NumArgs == 3) || (NumArgs == 4); + const bool IsMaskZ = (NumArgs == 3); + + if (NumArgs == 4) { + K = popToAPSInt(S, Call->getArg(3)); + SrcPT = S.Stk.pop(); + } else if (NumArgs == 3) { + K = popToAPSInt(S, Call->getArg(2)); + } + + APSInt Imm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + const unsigned NumElems = Dst.getNumElems(); + const PrimType ElemT = Dst.getFieldDesc()->getPrimType(); + + const unsigned ElemBits = 16; + const unsigned LaneElems = 128u / ElemBits; + const unsigned HalfBase = 4; + assert(NumElems % LaneElems == 0); + + const uint8_t Ctl = static_cast(Imm.getZExtValue()); + + for (unsigned i = 0; i != NumElems; ++i) { + const unsigned laneBase = (i / LaneElems) * LaneElems; + const unsigned inLane = i % LaneElems; + + unsigned srcIdx; + if (inLane >= HalfBase) { + const unsigned pos = inLane - HalfBase; + const unsigned sel = (Ctl >> (2 * pos)) & 0x3; + srcIdx = laneBase + HalfBase + sel; + } else { + srcIdx = i; + } + + APSInt Chosen; + INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem(srcIdx).toAPSInt(); }); + + if (!HasMask) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Chosen); }); + continue; + } + + const bool Keep = + (i < static_cast(K.getBitWidth())) ? K[i] : false; + if (Keep) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Chosen); }); + } else if (IsMaskZ) { + APSInt Zero(APInt(Chosen.getBitWidth(), 0)); + Zero.setIsSigned(Chosen.isSigned()); + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Zero); }); + } else { + APSInt PT; + INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem(i).toAPSInt(); }); + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem(i) = static_cast(PT); }); + } + } + + Dst.initializeAllElements(); + return true; +} + +static bool interp__builtin_ia32_pshufd_common(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + (void)OpPC; + const unsigned NumArgs = Call->getNumArgs(); + assert(NumArgs == 2 || NumArgs == 3 || NumArgs == 4); + + APSInt K; + Pointer SrcPT; + const bool HasMask = (NumArgs == 3) || (NumArgs == 4); + const bool IsMaskZ = (NumArgs == 3); + + if (NumArgs == 4) { + K = popToAPSInt(S, Call->getArg(3)); + SrcPT = S.Stk.pop(); + } else if (NumArgs == 3) { + K = popToAPSInt(S, Call->getArg(2)); + } + + APSInt Imm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + const unsigned NumElems = Dst.getNumElems(); + const PrimType ElemT = Dst.getFieldDesc()->getPrimType(); + + const unsigned ElemBits = 32; + const unsigned LaneElems = 128u / ElemBits; + assert(NumElems % LaneElems == 0); + + const uint8_t Ctl = static_cast(Imm.getZExtValue()); + + for (unsigned i = 0; i != NumElems; ++i) { + const unsigned laneBase = (i / LaneElems) * LaneElems; + const unsigned inLane = i % LaneElems; + const unsigned sel = (Ctl >> (2 * inLane)) & 0x3; + const unsigned srcIdx = laneBase + sel; + + APSInt Chosen; + INT_TYPE_SWITCH(ElemT, { Chosen = Src.elem(srcIdx).toAPSInt(); }); + + if (!HasMask) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Chosen); }); + continue; + } + + const bool Keep = + (i < static_cast(K.getBitWidth())) ? K[i] : false; + if (Keep) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Chosen); }); + } else if (IsMaskZ) { + APSInt Zero(APInt(Chosen.getBitWidth(), 0)); + Zero.setIsSigned(Chosen.isSigned()); + INT_TYPE_SWITCH_NO_BOOL(ElemT, + { Dst.elem(i) = static_cast(Zero); }); + } else { + APSInt PT; + INT_TYPE_SWITCH(ElemT, { PT = SrcPT.elem(i).toAPSInt(); }); + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem(i) = static_cast(PT); }); + } + } + + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref @@ -2967,6 +3179,7 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3417,6 +3630,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_elementwise_int_binop(S, OpPC, Call, llvm::APIntOps::mulhs); + case clang::X86::BI__builtin_ia32_pshuflw: + case clang::X86::BI__builtin_ia32_pshuflw256: + case clang::X86::BI__builtin_ia32_pshuflw512: + case clang::X86::BI__builtin_ia32_pshuflw128_mask: + case clang::X86::BI__builtin_ia32_pshuflw256_mask: + case clang::X86::BI__builtin_ia32_pshuflw512_mask: + case clang::X86::BI__builtin_ia32_pshuflw128_maskz: + case clang::X86::BI__builtin_ia32_pshuflw256_maskz: + case clang::X86::BI__builtin_ia32_pshuflw512_maskz: + return interp__builtin_ia32_pshuflw_common(S, OpPC, Call); + + case clang::X86::BI__builtin_ia32_pshufhw: + case clang::X86::BI__builtin_ia32_pshufhw256: + case clang::X86::BI__builtin_ia32_pshufhw512: + case clang::X86::BI__builtin_ia32_pshufhw128_mask: + case clang::X86::BI__builtin_ia32_pshufhw256_mask: + case clang::X86::BI__builtin_ia32_pshufhw512_mask: + case clang::X86::BI__builtin_ia32_pshufhw128_maskz: + case clang::X86::BI__builtin_ia32_pshufhw256_maskz: + case clang::X86::BI__builtin_ia32_pshufhw512_maskz: + return interp__builtin_ia32_pshufhw_common(S, OpPC, Call); + + case clang::X86::BI__builtin_ia32_pshufd: + case clang::X86::BI__builtin_ia32_pshufd256: + case clang::X86::BI__builtin_ia32_pshufd512: + case clang::X86::BI__builtin_ia32_pshufd128_mask: + case clang::X86::BI__builtin_ia32_pshufd256_mask: + case clang::X86::BI__builtin_ia32_pshufd512_mask: + case clang::X86::BI__builtin_ia32_pshufd128_maskz: + case clang::X86::BI__builtin_ia32_pshufd256_maskz: + case clang::X86::BI__builtin_ia32_pshufd512_maskz: + return interp__builtin_ia32_pshufd_common(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_psllv2di: case clang::X86::BI__builtin_ia32_psllv4di: case clang::X86::BI__builtin_ia32_psllv4si: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b706b14945b6d..3fee702120abc 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11868,6 +11868,292 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } +case X86::BI__builtin_ia32_pshufw: { + APValue Src; + APSInt Imm; + if (!EvaluateAsRValue(Info, E->getArg(0), Src)) return false; + if (!EvaluateInteger(E->getArg(1), Imm, Info)) return false; + + unsigned N = Src.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(N); + + uint8_t C = static_cast(Imm.getZExtValue()); + for (unsigned i = 0; i != N; ++i) { + unsigned sel = (C >> (2 * i)) & 0x3; + ResultElements.push_back(Src.getVectorElt(sel)); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); +} + +case clang::X86::BI__builtin_ia32_pshuflw: +case clang::X86::BI__builtin_ia32_pshuflw256: +case clang::X86::BI__builtin_ia32_pshuflw512: +case clang::X86::BI__builtin_ia32_pshuflw128_mask: +case clang::X86::BI__builtin_ia32_pshuflw256_mask: +case clang::X86::BI__builtin_ia32_pshuflw512_mask: +case clang::X86::BI__builtin_ia32_pshuflw128_maskz: +case clang::X86::BI__builtin_ia32_pshuflw256_maskz: +case clang::X86::BI__builtin_ia32_pshuflw512_maskz: { + const unsigned BID = E->getBuiltinCallee(); + + const bool IsMask = + BID == clang::X86::BI__builtin_ia32_pshuflw128_mask || + BID == clang::X86::BI__builtin_ia32_pshuflw256_mask || + BID == clang::X86::BI__builtin_ia32_pshuflw512_mask; + + const bool IsMaskZ = + BID == clang::X86::BI__builtin_ia32_pshuflw128_maskz || + BID == clang::X86::BI__builtin_ia32_pshuflw256_maskz || + BID == clang::X86::BI__builtin_ia32_pshuflw512_maskz; + + const unsigned AIdx = 0, ImmIdx = 1; + const unsigned SrcIdx = 2; + const unsigned KIdx = IsMaskZ ? 2 : 3; + + APValue AVal, SrcVal; + APSInt Imm, K; + if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false; + if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false; + + const APSInt *KPtr = nullptr; + const APValue *PassThru = nullptr; + bool ZeroInactive = false; + + if (IsMask) { + if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false; + if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false; + KPtr = &K; PassThru = &SrcVal; ZeroInactive = false; + } else if (IsMaskZ) { + if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false; + KPtr = &K; PassThru = nullptr; ZeroInactive = true; + } + + const auto *VT = E->getType()->getAs(); + if (!VT) return false; + const unsigned NumElts = VT->getNumElements(); + + const unsigned ElemBits = 16; + const unsigned LaneElems = std::min(NumElts, 128u / ElemBits); + const unsigned Half = 4; + const uint8_t Ctl = static_cast(Imm.getZExtValue()); + const bool DestUnsigned = + VT->getElementType()->isUnsignedIntegerOrEnumerationType(); + + auto MakeZero = [&]() -> APValue { + return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned)); + }; + + SmallVector ResultElements; + ResultElements.reserve(NumElts); + + for (unsigned i = 0; i < NumElts; ++i) { + const unsigned laneBase = (i / LaneElems) * LaneElems; + const unsigned inLane = i % LaneElems; + + APValue Chosen; + if (inLane < Half) { + const unsigned pos = inLane; + const unsigned sel = (Ctl >> (2 * pos)) & 0x3; + const unsigned srcIdx = laneBase + sel; + Chosen = AVal.getVectorElt(srcIdx); + } else { + Chosen = AVal.getVectorElt(i); + } + + if (KPtr) { + const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false; + if (Keep) { + ResultElements.push_back(Chosen); + } else if (ZeroInactive) { + ResultElements.push_back(MakeZero()); + } else { + const APValue &PT = PassThru ? PassThru->getVectorElt(i) + : AVal.getVectorElt(i); + ResultElements.push_back(PT); + } + } else { + ResultElements.push_back(Chosen); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); +} + +case clang::X86::BI__builtin_ia32_pshufhw: +case clang::X86::BI__builtin_ia32_pshufhw256: +case clang::X86::BI__builtin_ia32_pshufhw512: +case clang::X86::BI__builtin_ia32_pshufhw128_mask: +case clang::X86::BI__builtin_ia32_pshufhw256_mask: +case clang::X86::BI__builtin_ia32_pshufhw512_mask: +case clang::X86::BI__builtin_ia32_pshufhw128_maskz: +case clang::X86::BI__builtin_ia32_pshufhw256_maskz: +case clang::X86::BI__builtin_ia32_pshufhw512_maskz: { + const unsigned BID = E->getBuiltinCallee(); + + const bool IsMask = + BID == clang::X86::BI__builtin_ia32_pshufhw128_mask || + BID == clang::X86::BI__builtin_ia32_pshufhw256_mask || + BID == clang::X86::BI__builtin_ia32_pshufhw512_mask; + + const bool IsMaskZ = + BID == clang::X86::BI__builtin_ia32_pshufhw128_maskz || + BID == clang::X86::BI__builtin_ia32_pshufhw256_maskz || + BID == clang::X86::BI__builtin_ia32_pshufhw512_maskz; + + const unsigned AIdx = 0, ImmIdx = 1; + const unsigned SrcIdx = 2; + const unsigned KIdx = IsMaskZ ? 2 : 3; + + APValue AVal, SrcVal; + APSInt Imm, K; + if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false; + if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false; + + const APSInt *KPtr = nullptr; + const APValue *PassThru = nullptr; + bool ZeroInactive = false; + if (IsMask) { + if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false; + if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false; + KPtr = &K; PassThru = &SrcVal; ZeroInactive = false; + } else if (IsMaskZ) { + if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false; + KPtr = &K; PassThru = nullptr; ZeroInactive = true; + } + + const auto *VT = E->getType()->getAs(); + if (!VT) return false; + const unsigned NumElts = VT->getNumElements(); + const unsigned ElemBits = 16; + const unsigned LaneElems = std::min(NumElts, 128u / ElemBits); + const unsigned Half = 4; + const uint8_t Ctl = static_cast(Imm.getZExtValue()); + const bool DestUnsigned = + VT->getElementType()->isUnsignedIntegerOrEnumerationType(); + + auto MakeZero = [&]() -> APValue { + return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned)); + }; + + SmallVector Out; + Out.reserve(NumElts); + + for (unsigned i = 0; i < NumElts; ++i) { + const unsigned laneBase = (i / LaneElems) * LaneElems; + const unsigned inLane = i % LaneElems; + + APValue Chosen; + if (inLane >= Half) { + const unsigned pos = inLane - Half; + const unsigned sel = (Ctl >> (2 * pos)) & 0x3; + const unsigned srcIdx = laneBase + Half + sel; + Chosen = AVal.getVectorElt(srcIdx); + } else { + Chosen = AVal.getVectorElt(i); + } + + if (KPtr) { + const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false; + if (Keep) { + Out.push_back(Chosen); + } else if (ZeroInactive) { + Out.push_back(MakeZero()); + } else { + const APValue &PT = PassThru ? PassThru->getVectorElt(i) + : AVal.getVectorElt(i); + Out.push_back(PT); + } + } else { + Out.push_back(Chosen); + } + } + return Success(APValue(Out.data(), Out.size()), E); +} + +case clang::X86::BI__builtin_ia32_pshufd: +case clang::X86::BI__builtin_ia32_pshufd256: +case clang::X86::BI__builtin_ia32_pshufd512: +case clang::X86::BI__builtin_ia32_pshufd128_mask: +case clang::X86::BI__builtin_ia32_pshufd256_mask: +case clang::X86::BI__builtin_ia32_pshufd512_mask: +case clang::X86::BI__builtin_ia32_pshufd128_maskz: +case clang::X86::BI__builtin_ia32_pshufd256_maskz: +case clang::X86::BI__builtin_ia32_pshufd512_maskz: { + const unsigned BID = E->getBuiltinCallee(); + + const bool IsMask = + BID == clang::X86::BI__builtin_ia32_pshufd512_mask || + BID == clang::X86::BI__builtin_ia32_pshufd128_mask || + BID == clang::X86::BI__builtin_ia32_pshufd256_mask; + + const bool IsMaskZ = + BID == clang::X86::BI__builtin_ia32_pshufd512_maskz || + BID == clang::X86::BI__builtin_ia32_pshufd128_maskz || + BID == clang::X86::BI__builtin_ia32_pshufd256_maskz; + + const unsigned AIdx = 0, ImmIdx = 1; + const unsigned SrcIdx = 2; + const unsigned KIdx = IsMaskZ ? 2 : 3; + + APValue AVal, SrcVal; + APSInt Imm, K; + if (!EvaluateAsRValue(Info, E->getArg(AIdx), AVal)) return false; + if (!EvaluateInteger(E->getArg(ImmIdx), Imm, Info)) return false; + + const APSInt *KPtr = nullptr; + const APValue *PassThru = nullptr; + bool ZeroInactive = false; + if (IsMask) { + if (!EvaluateAsRValue(Info, E->getArg(SrcIdx), SrcVal)) return false; + if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false; + KPtr = &K; PassThru = &SrcVal; ZeroInactive = false; + } else if (IsMaskZ) { + if (!EvaluateInteger(E->getArg(KIdx), K, Info)) return false; + KPtr = &K; PassThru = nullptr; ZeroInactive = true; + } + + const auto *VT = E->getType()->getAs(); + if (!VT) return false; + const unsigned NumElts = VT->getNumElements(); + const unsigned ElemBits = 32; + const unsigned LaneElems = std::min(NumElts, 128u / ElemBits); + const uint8_t Ctl = static_cast(Imm.getZExtValue()); + const bool DestUnsigned = + VT->getElementType()->isUnsignedIntegerOrEnumerationType(); + + auto MakeZero = [&]() -> APValue { + return APValue(APSInt(APInt(ElemBits, 0), DestUnsigned)); + }; + + SmallVector Out; + Out.reserve(NumElts); + + for (unsigned i = 0; i < NumElts; ++i) { + const unsigned laneBase = (i / LaneElems) * LaneElems; + const unsigned inLane = i % LaneElems; + + const unsigned pos = inLane & 3; + const unsigned sel = (Ctl >> (2 * pos)) & 0x3; + const unsigned srcIdx = laneBase + sel; + APValue Chosen = AVal.getVectorElt(srcIdx); + + if (KPtr) { + const bool Keep = (i < KPtr->getBitWidth()) ? (*KPtr)[i] : false; + if (Keep) { + Out.push_back(Chosen); + } else if (ZeroInactive) { + Out.push_back(MakeZero()); + } else { + const APValue &PT = PassThru ? PassThru->getVectorElt(i) + : AVal.getVectorElt(i); + Out.push_back(PT); + } + } else { + Out.push_back(Chosen); + } + } + return Success(APValue(Out.data(), Out.size()), E); +} case clang::X86::BI__builtin_ia32_vprotbi: case clang::X86::BI__builtin_ia32_vprotdi: @@ -12191,6 +12477,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: case X86::BI__builtin_ia32_insertf64x2_256: diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index 5f617530b6f78..039f8c5cacddb 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -43,10 +43,15 @@ typedef char __v16qi __attribute__((__vector_size__(16))); __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX \ + __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) + #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr +#define __DEFAULT_FN_ATTRS_MMX_CONSTEXPR __DEFAULT_FN_ATTRS_MMX constexpr #else #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 +#define __DEFAULT_FN_ATTRS_MMX_CONSTEXPR __DEFAULT_FN_ATTRS_MMX #endif #define __trunc64(x) \ @@ -187,6 +192,7 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2) { (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); } + /// Converts, with saturation, 16-bit signed integers from both 64-bit integer /// vector parameters of [4 x i16] into 8-bit unsigned integer values, and /// constructs a 64-bit integer vector of [8 x i8] as the result. diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index eff2797e87c75..2e34bd83524a8 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1122,6 +1122,11 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) { return _mm256_shufflelo_epi16(a, 83); } +TEST_CONSTEXPR(match_v8si(_mm256_shuffle_epi32((((__m256i)(__v8si){0,1,2,3,4,5,6,7})), 15), 3,3,0,0, 7,7,4,4)); +TEST_CONSTEXPR(match_v16hi(_mm256_shufflehi_epi16((((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 107), 0,1,2,3, 7,6,6,5, 8,9,10,11, 15,14,14,13)); +TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) ); + + __m256i test_mm256_sign_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi8 // CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 3f42ac0268978..b388d2f0e668d 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1890,7 +1890,6 @@ __m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shufflehi_epi16(__U, __A, 5); } - __m512i test_mm512_shufflelo_epi16(__m512i __A) { // CHECK-LABEL: test_mm512_shufflelo_epi16 // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> @@ -1911,6 +1910,17 @@ __m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) { return _mm512_maskz_shufflelo_epi16(__U, __A, 5); } + +//_mm512_shufflehi_epi16 , _mm512_mask_shufflehi_epi16 , _mm512_maskz_shufflehi_epi16 +TEST_CONSTEXPR(match_v32hi(_mm512_shufflehi_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,2,3, 5,5,4,4, 8,9,10,11, 13,13,12,12, 16,17,18,19, 21,21,20,20, 24,25,26,27, 29,29,28,28)); +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16((((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131})), 0xFFFF0000u, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, 16,17,18,19,21,21,20,20, 24,25,26,27,29,29,28,28)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0xAAAAAAAAu, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,0,3,0,5,0,4, 0,9,0,11,0,13,0,12, 0,17,0,19,0,21,0,20, 0,25,0,27,0,29,0,28)); + +// _mm512_shufflelo_epi16, _mm512_mask_shufflelo_epi16, _mm512_maskz_shufflelo_epi16 +TEST_CONSTEXPR( match_v32hi(_mm512_shufflelo_epi16(((__m512i)(__v32hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15, 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31}), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31)); +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31)); + __m512i test_mm512_sllv_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_sllv_epi16 // CHECK: @llvm.x86.avx512.psllv.w.512( diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 84eaad8d99e61..6a1ccacc8caeb 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9088,6 +9088,20 @@ __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { return _mm512_maskz_shuffle_epi32(__U, __A, 1); } +// _mm512_shuffle_epi32 +TEST_CONSTEXPR(match_v16si(_mm512_shuffle_epi32((((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12)); + +// _mm512_mask_shuffle_epi32 +TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0xFFFFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0x0000u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0x00FFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 200,201,202,203,204,205,206,207)); + +// _mm512_maskz_shuffle_epi32 +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0xFFFFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0x5555u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,0,4,0, 9,0,8,0, 13,0,12,0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0x8001u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,12)); + + __m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_expand_pd // CHECK: @llvm.x86.avx512.mask.expand diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5282c7ab06dea..ed26bb3d35fde 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -10046,6 +10046,26 @@ __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { return _mm256_maskz_shuffle_epi32(__U, __A, 2); } + +// 128-bit (_mm_mask_shuffle_epi32 / _mm_maskz_shuffle_epi32) +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x0Fu, ((__m128i)(__v4si){0,1,2,3}), 1), 1,0,0,0)); +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x0Au, ((__m128i)(__v4si){0,1,2,3}), 1), 100,0,102,0)); +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x05u, ((__m128i)(__v4si){0,1,2,3}), 1), 1,101,0,103)); +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x00u, ((__m128i)(__v4si){0,1,2,3}), 1), 100,101,102,103)); + +TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x01u, ((__m128i)(__v4si){0,1,2,3}), 2), 2,0,0,0)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x0Au, ((__m128i)(__v4si){0,1,2,3}), 2), 0,0,0,0)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x0Fu, ((__m128i)(__v4si){0,1,2,3}), 2), 2,0,0,0)); + +// 256-bit (_mm256_mask_shuffle_epi32 / _mm256_maskz_shuffle_epi32) +TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0xF0u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 100,101,102,103, 6,4,4,4)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,102,103, 6,4,106,107)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0x00u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 100,101,102,103,104,105,106,107)); + +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,0,0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xAAu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 0,0,0,0, 0,4,0,4)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xFFu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,4,4)); + __m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_mov_pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 6c9c80efcef9d..f21ba94d05800 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3420,7 +3420,6 @@ __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A) // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shufflehi_epi16(__W, __U, __A, 5); } - __m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_shufflehi_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> @@ -3442,6 +3441,66 @@ __m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) { return _mm256_maskz_shufflelo_epi16(__U, __A, 5); } + +// 128-bit shufflelo (mask) +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),0xFF,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,1,102,0,104,5,106,7)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,101,0,103,4,105,6,107)); + +// 128-bit shufflelo (maskz) +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xFF,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,0,0,0,5,0,7)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,0,0,0,4,0,6,0)); + +// 128-bit shufflehi (mask)) +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xFFu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,101,2,103,5,105,4,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,1,102,3,104,5,106,4)); + +// 128-bit shufflehi (maskz) +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xFFu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,2,0,5,0,4,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,0,3,0,5,0,4)); + +// 256-bit shufflelo (mask) +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0xFFFF,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0x000Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,104,105,106,107,200,201,202,203,204,205,206,207)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,200,201,202,203,204,205,206,207)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0xF00Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,104,105,106,107,200,201,202,203,12,13,14,15)); + + +// 256-bit shufflelo (maskz) +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0xFFFF,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0x000Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0xF0F0u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,4,5,6,7,0,0,0,0,12,13,14,15)); + +// 256-bit shufflehi (mask) +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0xFF00u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),100,101,102,103,104,105,106,107,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x0000u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0xFFFFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,108,109,110,111,112,113,114,115)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x5555u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,101,2,103,5,105,4,107,8,109,10,111,13,113,12,115)); + +// 256-bit shufflehi (maskz) +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x0000u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xFFFFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xFF00u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,0,0,0,0,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x5555u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,2,0,5,0,4,0,8,0,10,0,13,0,12,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xAAAAu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,0,3,0,5,0,4,0,9,0,11,0,13,0,12)); + void test_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepi16_storeu_epi8 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 26c5f7315457e..539de13d44e6a 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -579,12 +579,13 @@ __m64 test_mm_shuffle_pi8(__m64 a, __m64 b) { return _mm_shuffle_pi8(a, b); } + __m64 test_mm_shuffle_pi16(__m64 a) { // CHECK-LABEL: test_mm_shuffle_pi16 // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> return _mm_shuffle_pi16(a, 3); } - +TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0)); __m64 test_mm_sign_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi8 // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128( diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 84b90c09444c2..b8423bdea4c54 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1294,12 +1294,6 @@ __m128i test_mm_setzero_si128(void) { } TEST_CONSTEXPR(match_m128i(_mm_setzero_si128(), 0, 0)); -__m128i test_mm_shuffle_epi32(__m128i A) { - // CHECK-LABEL: test_mm_shuffle_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer - return _mm_shuffle_epi32(A, 0); -} - __m128d test_mm_shuffle_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_shuffle_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> @@ -1317,6 +1311,16 @@ __m128i test_mm_shufflelo_epi16(__m128i A) { // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> return _mm_shufflelo_epi16(A, 0); } +__m128i test_mm_shuffle_epi32(__m128i A) { + // CHECK-LABEL: test_mm_shuffle_epi32 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer + return _mm_shuffle_epi32(A, 0); +} + + +TEST_CONSTEXPR(match_v4si(_mm_shuffle_epi32(((__m128i)(__v4si){0,1,2,3}), 0), 0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_shufflehi_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}), 0), 0,1,2,3, 4,4,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_shufflelo_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}), 0), 0,0,0,0, 4,5,6,7)); __m128i test_mm_sll_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sll_epi16