Skip to content

Commit 876114f

Browse files
authored
[X86] Add widenBuildVector to create a wider build vector if the scalars are mergeable (llvm#167667)
See if each pair of scalar operands of a build vector can be freely merged together - typically if they've been split for some reason by legalization. If we can create a new build vector node with double the scalar size, but half the element count - reducing codegen complexity and potentially allowing further optimization. I did look at performing this generically in DAGCombine, but we don't have as much control over when a legal build vector can be folded - another generic fold would be to handle this on insert_vector_elt pairs, but again legality checks could be limiting. Fixes llvm#167498
1 parent 59c01cc commit 876114f

File tree

3 files changed

+141
-339
lines changed

3 files changed

+141
-339
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8865,6 +8865,56 @@ static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
88658865
return SDValue();
88668866
}
88678867

8868+
/// Widen a BUILD_VECTOR if the scalar operands are freely mergeable.
8869+
static SDValue widenBuildVector(BuildVectorSDNode *BVOp, SDLoc const &DL,
8870+
X86Subtarget const &Subtarget,
8871+
SelectionDAG &DAG) {
8872+
using namespace SDPatternMatch;
8873+
MVT VT = BVOp->getSimpleValueType(0);
8874+
MVT SVT = VT.getScalarType();
8875+
unsigned NumElts = VT.getVectorNumElements();
8876+
unsigned EltBits = SVT.getSizeInBits();
8877+
8878+
if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
8879+
return SDValue();
8880+
8881+
unsigned WideBits = 2 * EltBits;
8882+
MVT WideSVT = MVT::getIntegerVT(WideBits);
8883+
MVT WideVT = MVT::getVectorVT(WideSVT, NumElts / 2);
8884+
if (!DAG.getTargetLoweringInfo().isTypeLegal(WideSVT))
8885+
return SDValue();
8886+
8887+
SmallVector<SDValue, 8> WideOps;
8888+
for (unsigned I = 0; I != NumElts; I += 2) {
8889+
SDValue Op0 = BVOp->getOperand(I + 0);
8890+
SDValue Op1 = BVOp->getOperand(I + 1);
8891+
8892+
if (Op0.isUndef() && Op1.isUndef()) {
8893+
WideOps.push_back(DAG.getUNDEF(WideSVT));
8894+
continue;
8895+
}
8896+
8897+
// TODO: Constant repacking?
8898+
8899+
// Merge scalars that have been split from the same source.
8900+
SDValue X, Y;
8901+
if (sd_match(Op0, m_Trunc(m_Value(X))) &&
8902+
sd_match(Op1, m_Trunc(m_Srl(m_Value(Y), m_SpecificInt(EltBits)))) &&
8903+
peekThroughTruncates(X) == peekThroughTruncates(Y) &&
8904+
X.getValueType().bitsGE(WideSVT)) {
8905+
if (X.getValueType().bitsGT(WideSVT))
8906+
X = DAG.getNode(ISD::TRUNCATE, DL, WideSVT, X);
8907+
WideOps.push_back(X);
8908+
continue;
8909+
}
8910+
8911+
return SDValue();
8912+
}
8913+
8914+
assert(WideOps.size() == (NumElts / 2) && "Failed to widen build vector");
8915+
return DAG.getBitcast(VT, DAG.getBuildVector(WideVT, DL, WideOps));
8916+
}
8917+
88688918
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
88698919
/// functionality to do this, so it's all zeros, all ones, or some derivation
88708920
/// that is cheap to calculate.
@@ -9335,6 +9385,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
93359385
return BitOp;
93369386
if (SDValue Blend = lowerBuildVectorAsBlend(BV, dl, Subtarget, DAG))
93379387
return Blend;
9388+
if (SDValue WideBV = widenBuildVector(BV, dl, Subtarget, DAG))
9389+
return WideBV;
93389390

93399391
unsigned NumZero = ZeroMask.popcount();
93409392
unsigned NumNonZero = NonZeroMask.popcount();

0 commit comments

Comments
 (0)