@@ -42618,9 +42618,11 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42618
42618
return SDValue();
42619
42619
}
42620
42620
case X86ISD::VPERM2X128: {
42621
- // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
42622
42621
SDValue LHS = N->getOperand(0);
42623
42622
SDValue RHS = N->getOperand(1);
42623
+ unsigned Imm = N.getConstantOperandVal(2);
42624
+
42625
+ // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).
42624
42626
if (LHS.getOpcode() == ISD::BITCAST &&
42625
42627
(RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) {
42626
42628
EVT SrcVT = LHS.getOperand(0).getValueType();
@@ -42653,7 +42655,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42653
42655
}
42654
42656
return SDValue();
42655
42657
};
42656
- unsigned Imm = N.getConstantOperandVal(2);
42657
42658
if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
42658
42659
if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
42659
42660
MVT SubVT = VT.getHalfNumVectorElementsVT();
@@ -42662,6 +42663,24 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42662
42663
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
42663
42664
}
42664
42665
}
42666
+
42667
+ // Attempt to match VBROADCAST*128 subvector broadcast load.
42668
+ if (RHS.isUndef()) {
42669
+ SmallVector<int, 4> Mask;
42670
+ DecodeVPERM2X128Mask(4, Imm, Mask);
42671
+ if (isUndefOrInRange(Mask, 0, 4)) {
42672
+ bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, LHS);
42673
+ bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, LHS);
42674
+ if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() &&
42675
+ X86::mayFoldLoad(LHS, Subtarget)) {
42676
+ MVT MemVT = VT.getHalfNumVectorElementsVT();
42677
+ unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
42678
+ return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL, VT, MemVT,
42679
+ cast<LoadSDNode>(LHS), Ofs, DAG);
42680
+ }
42681
+ }
42682
+ }
42683
+
42665
42684
return SDValue();
42666
42685
}
42667
42686
case X86ISD::PSHUFD:
0 commit comments