@@ -20819,7 +20819,8 @@ static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
2081920819static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2082020820 SDValue In, const SDLoc &DL,
2082120821 SelectionDAG &DAG,
20822- const X86Subtarget &Subtarget) {
20822+ const X86Subtarget &Subtarget,
20823+ const SDNodeFlags Flags = SDNodeFlags()) {
2082320824 // Requires SSE2.
2082420825 if (!Subtarget.hasSSE2())
2082520826 return SDValue();
@@ -20865,7 +20866,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2086520866 // e.g. Masks, zext_in_reg, etc.
2086620867 // Pre-SSE41 we can only use PACKUSWB.
2086720868 KnownBits Known = DAG.computeKnownBits(In);
20868- if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
20869+ if ((Flags.hasNoUnsignedWrap() && NumDstEltBits <= NumPackedZeroBits) ||
20870+ (NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {
2086920871 PackOpcode = X86ISD::PACKUS;
2087020872 return In;
2087120873 }
@@ -20884,7 +20886,7 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2088420886 return SDValue();
2088520887
2088620888 unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
20887- if (MinSignBits < NumSignBits) {
20889+ if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) {
2088820890 PackOpcode = X86ISD::PACKSS;
2088920891 return In;
2089020892 }
@@ -20906,10 +20908,9 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
2090620908/// This function lowers a vector truncation of 'extended sign-bits' or
2090720909/// 'extended zero-bits' values.
2090820910/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
20909- static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
20910- const SDLoc &DL,
20911- const X86Subtarget &Subtarget,
20912- SelectionDAG &DAG) {
20911+ static SDValue LowerTruncateVecPackWithSignBits(
20912+ MVT DstVT, SDValue In, const SDLoc &DL, const X86Subtarget &Subtarget,
20913+ SelectionDAG &DAG, const SDNodeFlags Flags = SDNodeFlags()) {
2091320914 MVT SrcVT = In.getSimpleValueType();
2091420915 MVT DstSVT = DstVT.getVectorElementType();
2091520916 MVT SrcSVT = SrcVT.getVectorElementType();
@@ -20931,8 +20932,8 @@ static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
2093120932 }
2093220933
2093320934 unsigned PackOpcode;
20934- if (SDValue Src =
20935- matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG, Subtarget))
20935+ if (SDValue Src = matchTruncateWithPACK(PackOpcode, DstVT, In, DL, DAG,
20936+ Subtarget, Flags ))
2093620937 return truncateVectorWithPACK(PackOpcode, DstVT, Src, DL, DAG, Subtarget);
2093720938
2093820939 return SDValue();
@@ -21102,8 +21103,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
2110221103 // Pre-AVX512 (or prefer-256bit) see if we can make use of PACKSS/PACKUS.
2110321104 if (!Subtarget.hasAVX512() ||
2110421105 (InVT.is512BitVector() && VT.is256BitVector()))
21105- if (SDValue SignPack =
21106- LowerTruncateVecPackWithSignBits( VT, In, DL, Subtarget, DAG))
21106+ if (SDValue SignPack = LowerTruncateVecPackWithSignBits(
21107+ VT, In, DL, Subtarget, DAG, Op->getFlags() ))
2110721108 return SignPack;
2110821109
2110921110 // Pre-AVX512 see if we can make use of PACKSS/PACKUS.
@@ -21120,8 +21121,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
2112021121 // Attempt to truncate with PACKUS/PACKSS even on AVX512 if we'd have to
2112121122 // concat from subvectors to use VPTRUNC etc.
2112221123 if (!Subtarget.hasAVX512() || isFreeToSplitVector(In.getNode(), DAG))
21123- if (SDValue SignPack =
21124- LowerTruncateVecPackWithSignBits( VT, In, DL, Subtarget, DAG))
21124+ if (SDValue SignPack = LowerTruncateVecPackWithSignBits(
21125+ VT, In, DL, Subtarget, DAG, Op->getFlags() ))
2112521126 return SignPack;
2112621127
2112721128 // vpmovqb/w/d, vpmovdb/w, vpmovwb
@@ -33578,10 +33579,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3357833579
3357933580 // See if there are sufficient leading bits to perform a PACKUS/PACKSS.
3358033581 unsigned PackOpcode;
33581- if (SDValue Src =
33582- matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG, Subtarget)) {
33583- if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src,
33584- dl, DAG, Subtarget)) {
33582+ if (SDValue Src = matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG,
33583+ Subtarget, N->getFlags() )) {
33584+ if (SDValue Res =
33585+ truncateVectorWithPACK(PackOpcode, VT, Src, dl, DAG, Subtarget)) {
3358533586 Res = widenSubVector(WidenVT, Res, false, Subtarget, DAG, dl);
3358633587 Results.push_back(Res);
3358733588 return;
0 commit comments