@@ -8756,45 +8756,70 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
8756
8756
8757
8757
// X86 has dedicated pack instructions that can handle specific truncation
8758
8758
// operations: PACKSS and PACKUS.
8759
- static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT,
8760
- ArrayRef<int> Mask, SDValue V1,
8761
- SDValue V2, SelectionDAG &DAG,
8762
- const X86Subtarget &Subtarget) {
8759
+ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
8760
+ SDValue &V2, unsigned &PackOpcode,
8761
+ ArrayRef<int> TargetMask,
8762
+ SelectionDAG &DAG,
8763
+ const X86Subtarget &Subtarget) {
8763
8764
unsigned NumElts = VT.getVectorNumElements();
8764
8765
unsigned BitSize = VT.getScalarSizeInBits();
8765
8766
MVT PackSVT = MVT::getIntegerVT(BitSize * 2);
8766
8767
MVT PackVT = MVT::getVectorVT(PackSVT, NumElts / 2);
8767
8768
8768
- auto LowerWithPACK = [&](SDValue N1, SDValue N2) {
8769
+ auto MatchPACK = [&](SDValue N1, SDValue N2) {
8769
8770
SDValue VV1 = DAG.getBitcast(PackVT, N1);
8770
8771
SDValue VV2 = DAG.getBitcast(PackVT, N2);
8771
8772
if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > BitSize) &&
8772
- (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > BitSize))
8773
- return DAG.getNode(X86ISD::PACKSS, DL, VT, VV1, VV2);
8773
+ (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > BitSize)) {
8774
+ V1 = VV1;
8775
+ V2 = VV2;
8776
+ SrcVT = PackVT;
8777
+ PackOpcode = X86ISD::PACKSS;
8778
+ return true;
8779
+ }
8774
8780
8775
8781
if (Subtarget.hasSSE41() || PackSVT == MVT::i16) {
8776
8782
APInt ZeroMask = APInt::getHighBitsSet(BitSize * 2, BitSize);
8777
8783
if ((N1.isUndef() || DAG.MaskedValueIsZero(VV1, ZeroMask)) &&
8778
- (N2.isUndef() || DAG.MaskedValueIsZero(VV2, ZeroMask)))
8779
- return DAG.getNode(X86ISD::PACKUS, DL, VT, VV1, VV2);
8784
+ (N2.isUndef() || DAG.MaskedValueIsZero(VV2, ZeroMask))) {
8785
+ V1 = VV1;
8786
+ V2 = VV2;
8787
+ SrcVT = PackVT;
8788
+ PackOpcode = X86ISD::PACKUS;
8789
+ return true;
8790
+ }
8780
8791
}
8781
8792
8782
- return SDValue() ;
8793
+ return false ;
8783
8794
};
8784
8795
8785
8796
// Try binary shuffle.
8786
8797
SmallVector<int, 32> BinaryMask;
8787
8798
createPackShuffleMask(VT, BinaryMask, false);
8788
- if (isShuffleEquivalent(V1, V2, Mask , BinaryMask))
8789
- if (SDValue Pack = LowerWithPACK (V1, V2))
8790
- return Pack ;
8799
+ if (isTargetShuffleEquivalent(TargetMask , BinaryMask))
8800
+ if (MatchPACK (V1, V2))
8801
+ return true ;
8791
8802
8792
8803
// Try unary shuffle.
8793
8804
SmallVector<int, 32> UnaryMask;
8794
8805
createPackShuffleMask(VT, UnaryMask, true);
8795
- if (isShuffleEquivalent(V1, V2, Mask, UnaryMask))
8796
- if (SDValue Pack = LowerWithPACK(V1, V1))
8797
- return Pack;
8806
+ if (isTargetShuffleEquivalent(TargetMask, UnaryMask))
8807
+ if (MatchPACK(V1, V1))
8808
+ return true;
8809
+
8810
+ return false;
8811
+ }
8812
+
8813
+ static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT,
8814
+ ArrayRef<int> Mask, SDValue V1,
8815
+ SDValue V2, SelectionDAG &DAG,
8816
+ const X86Subtarget &Subtarget) {
8817
+ MVT PackVT;
8818
+ unsigned PackOpcode;
8819
+ if (matchVectorShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
8820
+ Subtarget))
8821
+ return DAG.getNode(PackOpcode, DL, VT, DAG.getBitcast(PackVT, V1),
8822
+ DAG.getBitcast(PackVT, V2));
8798
8823
8799
8824
return SDValue();
8800
8825
}
0 commit comments