Skip to content

Commit 26e9079

Browse files
committed
[DAGCombiner] Lower dynamic insertelt chain
For an insertelt with dynamic indices, the default handling in DAGTypeLegalizer and LegalizeDAG will reserve a stack slot for the vector, lower the insertelt to a store, then load the modified vector back into temporaries. The vector store and load may be legalized into a sequence of smaller operations depending on the target. Let V = the vector size and L = the length of a chain of insertelts with dynamic indices. In the worse case, this chain will lower to O(VL) operations, which can increase code size dramatically. Instead, identify such chains, reserve one stack slot for the vector, and lower all of the insertelts to stores at once. This requires only O(V + L) operations. This change only affects the default lowering behavior and still leaves targets to do their own thing.
1 parent ca55c07 commit 26e9079

File tree

3 files changed

+449
-35
lines changed

3 files changed

+449
-35
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23476,6 +23476,72 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2347623476
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
2347723477
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
2347823478
return DAG.getSplat(VT, DL, InVal);
23479+
23480+
// Check if this operation is illegal and will be handled the default way.
23481+
if (TLI.getTypeAction(*DAG.getContext(), VT) ==
23482+
TargetLowering::TypeSplitVector ||
23483+
TLI.isOperationExpand(ISD::INSERT_VECTOR_ELT, VT)) {
23484+
// For each dynamic insertelt, the default way will save the vector to
23485+
// the stack, store at an offset, and load the modified vector. This can
23486+
// dramatically increase code size if we have a chain of insertelts on a
23487+
// large vector: requiring O(V*C) stores/loads where V = length of
23488+
// vector and C is length of chain. If each insertelt is only fed into the
23489+
// next, the vector is write-only across this chain, and we can just
23490+
// save once before the chain and load after in O(V + C) operations.
23491+
SmallVector<SDNode *> Seq{N};
23492+
unsigned NumDynamic = 1;
23493+
while (true) {
23494+
SDValue InVec = Seq.back()->getOperand(0);
23495+
if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
23496+
break;
23497+
Seq.push_back(InVec.getNode());
23498+
NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
23499+
}
23500+
23501+
// We will lower every insertelt in the sequence to a store. In the
23502+
// default handling, only dynamic insertelts in the sequence will be
23503+
// lowered to a store (+ vector save/load for each). Check that our
23504+
// approach reduces the total number of loads and stores over the default.
23505+
if (2 * VT.getVectorMinNumElements() + Seq.size() <
23506+
NumDynamic * (2 * VT.getVectorMinNumElements() + 1)) {
23507+
// In cases where the vector is illegal it will be broken down into
23508+
// parts and stored in parts - we should use the alignment for the
23509+
// smallest part.
23510+
Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
23511+
SDValue StackPtr =
23512+
DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
23513+
auto &MF = DAG.getMachineFunction();
23514+
int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
23515+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
23516+
23517+
// Save the vector to the stack
23518+
SDValue InVec = Seq.back()->getOperand(0);
23519+
SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
23520+
PtrInfo, SmallestAlign);
23521+
23522+
// Lower each dynamic insertelt to a store
23523+
for (SDNode *N : reverse(Seq)) {
23524+
SDValue Elmnt = N->getOperand(1);
23525+
SDValue Index = N->getOperand(2);
23526+
23527+
// Store the new element. This may be larger than the vector element
23528+
// type, so use a truncating store.
23529+
SDValue EltPtr =
23530+
TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
23531+
EVT EltVT = Elmnt.getValueType();
23532+
Store = DAG.getTruncStore(
23533+
Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
23534+
EltVT,
23535+
commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
23536+
}
23537+
23538+
// Load the saved vector from the stack
23539+
SDValue Load =
23540+
DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
23541+
return Load.getValue(0);
23542+
}
23543+
}
23544+
2347923545
return SDValue();
2348023546
}
2348123547

0 commit comments

Comments
 (0)