@@ -42618,9 +42618,11 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4261842618 return SDValue();
4261942619 }
4262042620 case X86ISD::VPERM2X128: {
42621- // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
4262242621 SDValue LHS = N->getOperand(0);
4262342622 SDValue RHS = N->getOperand(1);
42623+ unsigned Imm = N.getConstantOperandVal(2);
42624+
42625+ // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
4262442626 if (LHS.getOpcode() == ISD::BITCAST &&
4262542627 (RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) {
4262642628 EVT SrcVT = LHS.getOperand(0).getValueType();
@@ -42653,7 +42655,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4265342655 }
4265442656 return SDValue();
4265542657 };
42656- unsigned Imm = N.getConstantOperandVal(2);
4265742658 if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
4265842659 if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
4265942660 MVT SubVT = VT.getHalfNumVectorElementsVT();
@@ -42662,6 +42663,24 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4266242663 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
4266342664 }
4266442665 }
42666+
42667+ // Attempt to match VBROADCAST*128 subvector broadcast load.
42668+ if (RHS.isUndef()) {
42669+ SmallVector<int, 4> Mask;
42670+ DecodeVPERM2X128Mask(4, Imm, Mask);
42671+ if (isUndefOrInRange(Mask, 0, 4)) {
42672+ bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, LHS);
42673+ bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, LHS);
42674+ if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() &&
42675+ X86::mayFoldLoad(LHS, Subtarget)) {
42676+ MVT MemVT = VT.getHalfNumVectorElementsVT();
42677+ unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
42678+ return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL, VT, MemVT,
42679+ cast<LoadSDNode>(LHS), Ofs, DAG);
42680+ }
42681+ }
42682+ }
42683+
4266542684 return SDValue();
4266642685 }
4266742686 case X86ISD::PSHUFD:
0 commit comments