Skip to content

Commit 74eb28a

Browse files
committed
[DAGCombiner] Spill dynamic insertelt chain in one go
A chain of dynamic insertelts with can be spilled at once. This avoids each insertelt being spilled in DAGTypeLegalizer which reduces code size and compile time.
1 parent cd33c6b commit 74eb28a

File tree

2 files changed

+420
-0
lines changed

2 files changed

+420
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23445,6 +23445,68 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2344523445
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
2344623446
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
2344723447
return DAG.getSplat(VT, DL, InVal);
23448+
23449+
if (TLI.getTypeAction(*DAG.getContext(), VT) ==
23450+
TargetLowering::TypeSplitVector) {
23451+
// For dynamic insertelts, the type legalizer may spill the entire
23452+
// vector. For a chain of dynamic insertelts, this can be really
23453+
// inefficient and bad for compile time. If each insertelt is only fed
23454+
// into the next, the vector is write-only across this chain, and we can
23455+
// just spill once.
23456+
SmallVector<SDNode *> Seq{N};
23457+
unsigned NumDynamic = 1;
23458+
while (true) {
23459+
SDValue InVec = Seq.back()->getOperand(0);
23460+
if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
23461+
break;
23462+
Seq.push_back(InVec.getNode());
23463+
NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
23464+
}
23465+
23466+
// We will lower every insertelt in the sequence to a store. In the
23467+
// default handling, only dynamic insertelts in the sequence will be
23468+
// lowered to a store (+ vector spilling for each). Check that our
23469+
// approach reduces the total number of loads and stores over the default.
23470+
if (2 * VT.getVectorMinNumElements() + Seq.size() <
23471+
NumDynamic * 2 * VT.getVectorMinNumElements()) {
23472+
// In cases where the vector is illegal it will be broken down into
23473+
// parts and stored in parts - we should use the alignment for the
23474+
// smallest part.
23475+
Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
23476+
SDValue StackPtr =
23477+
DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
23478+
auto &MF = DAG.getMachineFunction();
23479+
int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
23480+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
23481+
23482+
// Begin spilling
23483+
SDValue InVec = Seq.back()->getOperand(0);
23484+
SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
23485+
PtrInfo, SmallestAlign);
23486+
23487+
// Lower each dynamic insertelt to a store
23488+
for (SDNode *N : reverse(Seq)) {
23489+
SDValue Elmnt = N->getOperand(1);
23490+
SDValue Index = N->getOperand(2);
23491+
23492+
// Store the new element. This may be larger than the vector element
23493+
// type, so use a truncating store.
23494+
SDValue EltPtr =
23495+
TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
23496+
EVT EltVT = Elmnt.getValueType();
23497+
Store = DAG.getTruncStore(
23498+
Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
23499+
EltVT,
23500+
commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
23501+
}
23502+
23503+
// Load the spilled vector
23504+
SDValue Load =
23505+
DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
23506+
return Load.getValue(0);
23507+
}
23508+
}
23509+
2344823510
return SDValue();
2344923511
}
2345023512

0 commit comments

Comments
 (0)