@@ -37324,41 +37324,33 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
37324
37324
if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
37325
37325
return Res;
37326
37326
37327
- // If both 128-bit values were inserted into high halves of 256-bit values,
37328
- // the shuffle can be reduced to a concatenation of subvectors:
37329
- // vperm2x128 (ins ?, X, C1), (ins ?, Y, C2), 0x31 --> concat X, Y
37330
- // Note: We are only looking for the exact high/high shuffle mask because we
37331
- // expect to fold other similar patterns before creating this opcode.
37332
- SDValue Ins0 = peekThroughBitcasts(N.getOperand(0));
37333
- SDValue Ins1 = peekThroughBitcasts(N.getOperand(1));
37327
+ // Combine vperm2x128 subvector shuffle with an inner concat pattern.
37328
+ // vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.
37329
+ auto FindSubVector128 = [&](unsigned Idx) {
37330
+ if (Idx > 3)
37331
+ return SDValue();
37332
+ SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
37333
+ SmallVector<SDValue> SubOps;
37334
+ if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2)
37335
+ return SubOps[Idx & 1];
37336
+ unsigned NumElts = Src.getValueType().getVectorNumElements();
37337
+ if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
37338
+ Src.getOperand(1).getValueSizeInBits() == 128 &&
37339
+ Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
37340
+ return Src.getOperand(1);
37341
+ }
37342
+ return SDValue();
37343
+ };
37334
37344
unsigned Imm = N.getConstantOperandVal(2);
37335
-
37336
- // Handle subvector splat by tweaking values to match binary concat.
37337
- // vperm2x128 (ins ?, X, C1), undef, 0x11 ->
37338
- // vperm2x128 (ins ?, X, C1), (ins ?, X, C1), 0x31 -> concat X, X
37339
- if (Imm == 0x11 && Ins1.isUndef()) {
37340
- Imm = 0x31 ;
37341
- Ins1 = Ins0;
37345
+ if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
37346
+ if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
37347
+ MVT SubVT = VT.getHalfNumVectorElementsVT();
37348
+ SubLo = DAG.getBitcast(SubVT, SubLo);
37349
+ SubHi = DAG.getBitcast(SubVT, SubHi);
37350
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi) ;
37351
+ }
37342
37352
}
37343
-
37344
- if (!(Imm == 0x31 &&
37345
- Ins0.getOpcode() == ISD::INSERT_SUBVECTOR &&
37346
- Ins1.getOpcode() == ISD::INSERT_SUBVECTOR &&
37347
- Ins0.getValueType() == Ins1.getValueType()))
37348
- return SDValue();
37349
-
37350
- SDValue X = Ins0.getOperand(1);
37351
- SDValue Y = Ins1.getOperand(1);
37352
- unsigned C1 = Ins0.getConstantOperandVal(2);
37353
- unsigned C2 = Ins1.getConstantOperandVal(2);
37354
- MVT SrcVT = X.getSimpleValueType();
37355
- unsigned SrcElts = SrcVT.getVectorNumElements();
37356
- if (SrcVT != Y.getSimpleValueType() || SrcVT.getSizeInBits() != 128 ||
37357
- C1 != SrcElts || C2 != SrcElts)
37358
- return SDValue();
37359
-
37360
- return DAG.getBitcast(VT, DAG.getNode(ISD::CONCAT_VECTORS, DL,
37361
- Ins1.getValueType(), X, Y));
37353
+ return SDValue();
37362
37354
}
37363
37355
case X86ISD::PSHUFD:
37364
37356
case X86ISD::PSHUFLW:
0 commit comments