Skip to content

Commit bcf6458

Browse files
committed
Generalise v4f64 shuffle concatenation to work with v8f64 as well
1 parent 3a6f20d commit bcf6458

File tree

1 file changed

+25
-28
lines changed

1 file changed

+25
-28
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -58739,18 +58739,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5873958739
DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1));
5874058740
return DAG.getBitcast(VT, Res);
5874158741
}
58742-
if (!IsSplat && VT == MVT::v8f64) {
58743-
unsigned NumSubElts = Op0.getValueType().getVectorNumElements();
58744-
uint64_t Mask = (1ULL << NumSubElts) - 1;
58745-
uint64_t Idx = 0;
58746-
for (unsigned I = 0; I != NumOps; ++I) {
58747-
uint64_t SubIdx = Ops[I].getConstantOperandVal(1);
58748-
Idx |= (SubIdx & Mask) << (I * NumSubElts);
58749-
}
58750-
return DAG.getNode(X86ISD::VPERMILPI, DL, VT,
58751-
ConcatSubOperand(VT, Ops, 0),
58752-
DAG.getTargetConstant(Idx, DL, MVT::i8));
58753-
}
5875458742
break;
5875558743
case X86ISD::VPERMILPV:
5875658744
if (!IsSplat && (VT.is256BitVector() ||
@@ -59322,7 +59310,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5932259310
}
5932359311

5932459312
// We can always convert per-lane vXf64 shuffles into VSHUFPD.
59325-
if (!IsSplat && NumOps == 2 && VT == MVT::v4f64 &&
59313+
if (!IsSplat &&
59314+
(VT == MVT::v4f64 || (VT == MVT::v8f64 && Subtarget.useAVX512Regs())) &&
5932659315
all_of(Ops, [](SDValue Op) {
5932759316
return Op.hasOneUse() && (Op.getOpcode() == X86ISD::MOVDDUP ||
5932859317
Op.getOpcode() == X86ISD::SHUFP ||
@@ -59331,25 +59320,33 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5933159320
Op.getOpcode() == X86ISD::UNPCKL ||
5933259321
Op.getOpcode() == X86ISD::UNPCKH);
5933359322
})) {
59334-
SmallVector<SDValue, 2> SrcOps0, SrcOps1;
59335-
SmallVector<int, 8> SrcMask0, SrcMask1;
59336-
if (getTargetShuffleMask(Ops[0], /*AllowSentinelZero=*/false, SrcOps0,
59337-
SrcMask0) &&
59338-
getTargetShuffleMask(Ops[1], /*AllowSentinelZero=*/false, SrcOps1,
59339-
SrcMask1)) {
59340-
assert(SrcMask0.size() == 2 && SrcMask1.size() == 2 && "Bad shuffles");
59341-
SDValue LHS[] = {SrcOps0[SrcMask0[0] / 2], SrcOps1[SrcMask1[0] / 2]};
59342-
SDValue RHS[] = {SrcOps0[SrcMask0[1] / 2], SrcOps1[SrcMask1[1] / 2]};
59323+
MVT OpVT = Ops[0].getSimpleValueType();
59324+
unsigned NumOpElts = OpVT.getVectorNumElements();
59325+
SmallVector<SmallVector<SDValue, 2>, 4> SrcOps(NumOps);
59326+
SmallVector<SmallVector<int, 8>, 4> SrcMasks(NumOps);
59327+
if (all_of(seq<int>(NumOps), [&](int I) {
59328+
return getTargetShuffleMask(Ops[I], /*AllowSentinelZero=*/false,
59329+
SrcOps[I], SrcMasks[I]) &&
59330+
SrcMasks[I].size() == NumOpElts &&
59331+
all_of(SrcOps[I], [&OpVT](SDValue V) {
59332+
return V.getValueType() == OpVT;
59333+
});
59334+
})) {
59335+
bool Unary = true;
59336+
unsigned SHUFPDMask = 0;
59337+
SmallVector<SDValue, 4> LHS(NumOps), RHS(NumOps);
59338+
for (unsigned I = 0; I != NumOps; ++I) {
59339+
LHS[I] = SrcOps[I][SrcMasks[I][0] / NumOpElts];
59340+
RHS[I] = SrcOps[I][SrcMasks[I][1] / NumOpElts];
59341+
Unary &= LHS[I] == RHS[I];
59342+
for (unsigned J = 0; J != NumOpElts; ++J)
59343+
SHUFPDMask |= (SrcMasks[I][J] & 1) << ((I * NumOpElts) + J);
59344+
}
5934359345
SDValue Concat0 =
5934459346
combineConcatVectorOps(DL, VT, LHS, DAG, Subtarget, Depth + 1);
5934559347
SDValue Concat1 =
5934659348
combineConcatVectorOps(DL, VT, RHS, DAG, Subtarget, Depth + 1);
59347-
if (Concat0 || Concat1) {
59348-
unsigned SHUFPDMask = 0;
59349-
SHUFPDMask |= (SrcMask0[0] & 1) << 0;
59350-
SHUFPDMask |= (SrcMask0[1] & 1) << 1;
59351-
SHUFPDMask |= (SrcMask1[0] & 1) << 2;
59352-
SHUFPDMask |= (SrcMask1[1] & 1) << 3;
59349+
if (Unary || Concat0 || Concat1) {
5935359350
Concat0 =
5935459351
Concat0 ? Concat0 : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS);
5935559352
Concat1 =

0 commit comments

Comments
 (0)