Skip to content

Commit 68e74f8

Browse files
authored
[DAGCombiner] Lower dynamic insertelt chain more efficiently (#162368)
For an insertelt with a dynamic index, the default handling in DAGTypeLegalizer and LegalizeDAG will reserve a stack slot for the vector, lower the insertelt to a store, then load the modified vector back into temporaries. The vector store and load may be legalized into a sequence of smaller operations depending on the target. Let V = the vector size and L = the length of a chain of insertelts with dynamic indices. In the worse case, this chain will lower to O(VL) operations, which can increase code size dramatically. Instead, identify such chains, reserve one stack slot for the vector, and lower all of the insertelts to stores at once. This requires only O(V + L) operations. This change only affects the default lowering behavior.
1 parent affed57 commit 68e74f8

File tree

3 files changed

+848
-35
lines changed

3 files changed

+848
-35
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23506,6 +23506,93 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2350623506
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
2350723507
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
2350823508
return DAG.getSplat(VT, DL, InVal);
23509+
23510+
// Extend this type to be byte-addressable
23511+
EVT OldVT = VT;
23512+
EVT EltVT = VT.getVectorElementType();
23513+
bool IsByteSized = EltVT.isByteSized();
23514+
if (!IsByteSized) {
23515+
EltVT =
23516+
EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
23517+
VT = VT.changeElementType(EltVT);
23518+
}
23519+
23520+
// Check if this operation will be handled the default way for its type.
23521+
auto IsTypeDefaultHandled = [this](EVT VT) {
23522+
return TLI.getTypeAction(*DAG.getContext(), VT) ==
23523+
TargetLowering::TypeSplitVector ||
23524+
TLI.isOperationExpand(ISD::INSERT_VECTOR_ELT, VT);
23525+
};
23526+
23527+
// Check if this operation is illegal and will be handled the default way,
23528+
// even after extending the type to be byte-addressable.
23529+
if (IsTypeDefaultHandled(OldVT) && IsTypeDefaultHandled(VT)) {
23530+
// For each dynamic insertelt, the default way will save the vector to
23531+
// the stack, store at an offset, and load the modified vector. This can
23532+
// dramatically increase code size if we have a chain of insertelts on a
23533+
// large vector: requiring O(V*C) stores/loads where V = length of
23534+
// vector and C is length of chain. If each insertelt is only fed into the
23535+
// next, the vector is write-only across this chain, and we can just
23536+
// save once before the chain and load after in O(V + C) operations.
23537+
SmallVector<SDNode *> Seq{N};
23538+
unsigned NumDynamic = 1;
23539+
while (true) {
23540+
SDValue InVec = Seq.back()->getOperand(0);
23541+
if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
23542+
break;
23543+
Seq.push_back(InVec.getNode());
23544+
NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
23545+
}
23546+
23547+
// It always and only makes sense to lower this sequence when we have more
23548+
// than one dynamic insertelt, since we will not have more than V constant
23549+
// insertelts, so we will be reducing the total number of stores+loads.
23550+
if (NumDynamic > 1) {
23551+
// In cases where the vector is illegal it will be broken down into
23552+
// parts and stored in parts - we should use the alignment for the
23553+
// smallest part.
23554+
Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
23555+
SDValue StackPtr =
23556+
DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
23557+
auto &MF = DAG.getMachineFunction();
23558+
int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
23559+
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
23560+
23561+
// Save the vector to the stack
23562+
SDValue InVec = Seq.back()->getOperand(0);
23563+
if (!IsByteSized)
23564+
InVec = DAG.getNode(ISD::ANY_EXTEND, DL, VT, InVec);
23565+
SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
23566+
PtrInfo, SmallestAlign);
23567+
23568+
// Lower each dynamic insertelt to a store
23569+
for (SDNode *N : reverse(Seq)) {
23570+
SDValue Elmnt = N->getOperand(1);
23571+
SDValue Index = N->getOperand(2);
23572+
23573+
// Check if we have to extend the element type
23574+
if (!IsByteSized && Elmnt.getValueType().bitsLT(EltVT))
23575+
Elmnt = DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Elmnt);
23576+
23577+
// Store the new element. This may be larger than the vector element
23578+
// type, so use a truncating store.
23579+
SDValue EltPtr =
23580+
TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
23581+
EVT EltVT = Elmnt.getValueType();
23582+
Store = DAG.getTruncStore(
23583+
Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
23584+
EltVT,
23585+
commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
23586+
}
23587+
23588+
// Load the saved vector from the stack
23589+
SDValue Load =
23590+
DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
23591+
SDValue LoadV = Load.getValue(0);
23592+
return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(LoadV, DL, OldVT);
23593+
}
23594+
}
23595+
2350923596
return SDValue();
2351023597
}
2351123598

0 commit comments

Comments
 (0)