Skip to content

Commit 4846f6a

Browse files
committed
[X86][AVX] combineTargetShuffle - simplify the X86ISD::VPERM2X128 subvector matching
Simplify vperm2x128(concat(X,Y),concat(Z,W)) folding. Use collectConcatOps / ISD::INSERT_SUBVECTOR to find the source subvectors instead of hardcoded immediate matching.
1 parent 02e174e commit 4846f6a

File tree

1 file changed

+25
-33
lines changed

1 file changed

+25
-33
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -37324,41 +37324,33 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
3732437324
if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL))
3732537325
return Res;
3732637326

37327-
// If both 128-bit values were inserted into high halves of 256-bit values,
37328-
// the shuffle can be reduced to a concatenation of subvectors:
37329-
// vperm2x128 (ins ?, X, C1), (ins ?, Y, C2), 0x31 --> concat X, Y
37330-
// Note: We are only looking for the exact high/high shuffle mask because we
37331-
// expect to fold other similar patterns before creating this opcode.
37332-
SDValue Ins0 = peekThroughBitcasts(N.getOperand(0));
37333-
SDValue Ins1 = peekThroughBitcasts(N.getOperand(1));
37327+
// Combine vperm2x128 subvector shuffle with an inner concat pattern.
37328+
// vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.
37329+
auto FindSubVector128 = [&](unsigned Idx) {
37330+
if (Idx > 3)
37331+
return SDValue();
37332+
SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1));
37333+
SmallVector<SDValue> SubOps;
37334+
if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2)
37335+
return SubOps[Idx & 1];
37336+
unsigned NumElts = Src.getValueType().getVectorNumElements();
37337+
if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
37338+
Src.getOperand(1).getValueSizeInBits() == 128 &&
37339+
Src.getConstantOperandAPInt(2) == (NumElts / 2)) {
37340+
return Src.getOperand(1);
37341+
}
37342+
return SDValue();
37343+
};
3733437344
unsigned Imm = N.getConstantOperandVal(2);
37335-
37336-
// Handle subvector splat by tweaking values to match binary concat.
37337-
// vperm2x128 (ins ?, X, C1), undef, 0x11 ->
37338-
// vperm2x128 (ins ?, X, C1), (ins ?, X, C1), 0x31 -> concat X, X
37339-
if (Imm == 0x11 && Ins1.isUndef()) {
37340-
Imm = 0x31;
37341-
Ins1 = Ins0;
37345+
if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) {
37346+
if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) {
37347+
MVT SubVT = VT.getHalfNumVectorElementsVT();
37348+
SubLo = DAG.getBitcast(SubVT, SubLo);
37349+
SubHi = DAG.getBitcast(SubVT, SubHi);
37350+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);
37351+
}
3734237352
}
37343-
37344-
if (!(Imm == 0x31 &&
37345-
Ins0.getOpcode() == ISD::INSERT_SUBVECTOR &&
37346-
Ins1.getOpcode() == ISD::INSERT_SUBVECTOR &&
37347-
Ins0.getValueType() == Ins1.getValueType()))
37348-
return SDValue();
37349-
37350-
SDValue X = Ins0.getOperand(1);
37351-
SDValue Y = Ins1.getOperand(1);
37352-
unsigned C1 = Ins0.getConstantOperandVal(2);
37353-
unsigned C2 = Ins1.getConstantOperandVal(2);
37354-
MVT SrcVT = X.getSimpleValueType();
37355-
unsigned SrcElts = SrcVT.getVectorNumElements();
37356-
if (SrcVT != Y.getSimpleValueType() || SrcVT.getSizeInBits() != 128 ||
37357-
C1 != SrcElts || C2 != SrcElts)
37358-
return SDValue();
37359-
37360-
return DAG.getBitcast(VT, DAG.getNode(ISD::CONCAT_VECTORS, DL,
37361-
Ins1.getValueType(), X, Y));
37353+
return SDValue();
3736237354
}
3736337355
case X86ISD::PSHUFD:
3736437356
case X86ISD::PSHUFLW:

0 commit comments

Comments
 (0)