@@ -20822,7 +20822,8 @@ static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
2082220822static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2082320823 SDValue In, const SDLoc &DL,
2082420824 SelectionDAG &DAG,
20825- const X86Subtarget &Subtarget) {
20825+ const X86Subtarget &Subtarget,
20826+ const SDNodeFlags Flags = SDNodeFlags()) {
2082620827 // Requires SSE2.
2082720828 if (!Subtarget.hasSSE2())
2082820829 return SDValue();
@@ -20868,7 +20869,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2086820869 // e.g. Masks, zext_in_reg, etc.
2086920870 // Pre-SSE41 we can only use PACKUSWB.
2087020871 KnownBits Known = DAG.computeKnownBits(In);
20871- if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
20872+ if ((Flags.hasNoUnsignedWrap() && NumDstEltBits <= NumPackedZeroBits) ||
20873+ (NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
2087220874 PackOpcode = X86ISD::PACKUS;
2087320875 return In;
2087420876 }
@@ -20887,7 +20889,7 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2088720889 return SDValue();
2088820890
2088920891 unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
20890- if (MinSignBits < NumSignBits) {
20892+ if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) {
2089120893 PackOpcode = X86ISD::PACKSS;
2089220894 return In;
2089320895 }
@@ -20909,10 +20911,9 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2090920911/// This function lowers a vector truncation of 'extended sign-bits' or
2091020912/// 'extended zero-bits' values.
2091120913/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
20912- static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
20913- const SDLoc &DL,
20914- const X86Subtarget &Subtarget,
20915- SelectionDAG &DAG) {
20914+ static SDValue LowerTruncateVecPackWithSignBits(
20915+ MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget,
20916+ SelectionDAG &DAG, const SDNodeFlags Flags = SDNodeFlags()) {
2091620917 MVT SrcVT = In.getSimpleValueType();
2091720918 MVT DstSVT = DstVT.getVectorElementType();
2091820919 MVT SrcSVT = SrcVT.getVectorElementType();
@@ -20934,8 +20935,8 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
2093420935 }
2093520936
2093620937 unsigned PackOpcode;
20937- if (SDValue Src =
20938- matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG, Subtarget))
20938+ if (SDValue Src = matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG,
20939+ Subtarget, Flags ))
2093920940 return truncateVectorWithPACK(PackOpcode, DstVT, Src, DL, DAG, Subtarget);
2094020941
2094120942 return SDValue();
@@ -21105,8 +21106,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
2110521106 // Pre-AVX512 (or prefer-256bit) see if we can make use of PACKSS/PACKUS.
2110621107 if (!Subtarget.hasAVX512() ||
2110721108 (InVT.is512BitVector() && VT.is256BitVector()))
21108- if (SDValue SignPack =
21109- LowerTruncateVecPackWithSignBits( VT, In, DL, Subtarget, DAG))
21109+ if (SDValue SignPack = LowerTruncateVecPackWithSignBits(
21110+ VT, In, DL, Subtarget, DAG, Op->getFlags() ))
2111021111 return SignPack;
2111121112
2111221113 // Pre-AVX512 see if we can make use of PACKSS/PACKUS.
@@ -21123,8 +21124,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
2112321124 // Attempt to truncate with PACKUS/PACKSS even on AVX512 if we'd have to
2112421125 // concat from subvectors to use VPTRUNC etc.
2112521126 if (!Subtarget.hasAVX512() || isFreeToSplitVector(In.getNode(), DAG))
21126- if (SDValue SignPack =
21127- LowerTruncateVecPackWithSignBits( VT, In, DL, Subtarget, DAG))
21127+ if (SDValue SignPack = LowerTruncateVecPackWithSignBits(
21128+ VT, In, DL, Subtarget, DAG, Op->getFlags() ))
2112821129 return SignPack;
2112921130
2113021131 // vpmovqb/w/d, vpmovdb/w, vpmovwb
@@ -33594,10 +33595,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3359433595
3359533596 // See if there are sufficient leading bits to perform a PACKUS/PACKSS.
3359633597 unsigned PackOpcode;
33597- if (SDValue Src =
33598- matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG, Subtarget)) {
33599- if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src,
33600- dl, DAG, Subtarget)) {
33598+ if (SDValue Src = matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG,
33599+ Subtarget, N->getFlags() )) {
33600+ if (SDValue Res =
33601+ truncateVectorWithPACK(PackOpcode, VT, Src, dl, DAG, Subtarget)) {
3360133602 Res = widenSubVector(WidenVT, Res, false, Subtarget, DAG, dl);
3360233603 Results.push_back(Res);
3360333604 return;
0 commit comments