Skip to content

Commit 33d4cb3

Browse files
committed
[X86] combineX86ShufflesRecursively - replace Root node argument with opcode/valuetype/ismaskedshuffle data. NFC.
Makes it easier for combineX86ShufflesRecursively to handle length changing shuffles up the shuffle chain.
1 parent 0735235 commit 33d4cb3

File tree

1 file changed

+20
-23
lines changed

1 file changed

+20
-23
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40881,16 +40881,14 @@ namespace llvm {
4088140881
/// combine-ordering. To fix this, we should do the redundant instruction
4088240882
/// combining in this recursive walk.
4088340883
static SDValue combineX86ShufflesRecursively(
40884-
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
40884+
ArrayRef<SDValue> SrcOps, int SrcOpIndex, unsigned RootOpc, MVT RootVT,
4088540885
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
4088640886
unsigned MaxDepth, bool AllowVariableCrossLaneMask,
40887-
bool AllowVariablePerLaneMask, SelectionDAG &DAG, const SDLoc &DL,
40888-
const X86Subtarget &Subtarget) {
40887+
bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG,
40888+
const SDLoc &DL, const X86Subtarget &Subtarget) {
4088940889
assert(!RootMask.empty() &&
4089040890
(RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
4089140891
"Illegal shuffle root mask");
40892-
unsigned RootOpc = Root.getOpcode();
40893-
MVT RootVT = Root.getSimpleValueType();
4089440892
assert(RootVT.isVector() && "Shuffles operate on vector types!");
4089540893
unsigned RootSizeInBits = RootVT.getSizeInBits();
4089640894

@@ -41185,8 +41183,9 @@ static SDValue combineX86ShufflesRecursively(
4118541183
AllowPerLaneVar = AllowVariablePerLaneMask;
4118641184
}
4118741185
if (SDValue Res = combineX86ShufflesRecursively(
41188-
Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
41189-
AllowCrossLaneVar, AllowPerLaneVar, DAG, DL, Subtarget))
41186+
Ops, i, RootOpc, RootVT, ResolvedMask, CombinedNodes, Depth + 1,
41187+
MaxDepth, AllowCrossLaneVar, AllowPerLaneVar, IsMaskedShuffle,
41188+
DAG, DL, Subtarget))
4119041189
return Res;
4119141190
}
4119241191
}
@@ -41272,10 +41271,6 @@ static SDValue combineX86ShufflesRecursively(
4127241271
resolveTargetShuffleInputsAndMask(Ops, Mask);
4127341272
}
4127441273

41275-
// If we are a AVX512/EVEX target the mask element size should match the root
41276-
// element size to allow writemasks to be reused.
41277-
bool IsMaskedShuffle = isMaskableNode(Root, Subtarget);
41278-
4127941274
// We can only combine unary and binary shuffle mask cases.
4128041275
if (Ops.size() <= 2) {
4128141276
// Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -41328,8 +41323,9 @@ static SDValue combineX86ShufflesRecursively(
4132841323
static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
4132941324
const X86Subtarget &Subtarget) {
4133041325
return combineX86ShufflesRecursively(
41331-
{Op}, 0, Op, {0}, {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41332-
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, DAG,
41326+
{Op}, 0, Op.getOpcode(), Op.getSimpleValueType(), {0}, {}, /*Depth*/ 0,
41327+
X86::MaxShuffleCombineDepth, /*AllowCrossLaneVarMask*/ true,
41328+
/*AllowPerLaneVarMask*/ true, isMaskableNode(Op, Subtarget), DAG,
4133341329
SDLoc(Op), Subtarget);
4133441330
}
4133541331

@@ -41980,10 +41976,10 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4198041976
for (unsigned i = 0; i != Scale; ++i)
4198141977
DemandedMask[i] = i;
4198241978
if (SDValue Res = combineX86ShufflesRecursively(
41983-
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
41984-
X86::MaxShuffleCombineDepth,
41985-
/*AllowCrossLaneVarMask*/ true,
41986-
/*AllowPerLaneVarMask*/ true, DAG, DL, Subtarget))
41979+
{BC}, 0, BC.getOpcode(), BC.getSimpleValueType(), DemandedMask,
41980+
{}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41981+
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true,
41982+
/*IsMaskedShuffle*/ false, DAG, DL, Subtarget))
4198741983
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
4198841984
DAG.getBitcast(SrcVT, Res));
4198941985
}
@@ -43984,8 +43980,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4398443980
DemandedMask[i] = i;
4398543981

4398643982
SDValue NewShuffle = combineX86ShufflesRecursively(
43987-
{Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
43988-
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, TLO.DAG,
43983+
{Op}, 0, Op.getOpcode(), Op.getSimpleValueType(), DemandedMask, {}, 0,
43984+
X86::MaxShuffleCombineDepth - Depth, /*AllowCrossLaneVarMask*/ true,
43985+
/*AllowPerLaneVarMask*/ true, isMaskableNode(Op, Subtarget), TLO.DAG,
4398943986
SDLoc(Op), Subtarget);
4399043987
if (NewShuffle)
4399143988
return TLO.CombineTo(Op, NewShuffle);
@@ -51620,10 +51617,10 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5162051617
}
5162151618

5162251619
if (SDValue Shuffle = combineX86ShufflesRecursively(
51623-
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
51624-
X86::MaxShuffleCombineDepth,
51625-
/*AllowVarCrossLaneMask*/ true,
51626-
/*AllowVarPerLaneMask*/ true, DAG, SDLoc(SrcVec), Subtarget))
51620+
{SrcVec}, 0, SrcVec.getOpcode(), SrcVec.getSimpleValueType(),
51621+
ShuffleMask, {}, /*Depth*/ 1, X86::MaxShuffleCombineDepth,
51622+
/*AllowVarCrossLaneMask*/ true, /*AllowVarPerLaneMask*/ true,
51623+
/*IsMaskedShuffle*/ false, DAG, SDLoc(SrcVec), Subtarget))
5162751624
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
5162851625
N0.getOperand(1));
5162951626
}

0 commit comments

Comments
 (0)