@@ -23445,6 +23445,68 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23445
23445
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23446
23446
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23447
23447
return DAG.getSplat(VT, DL, InVal);
23448
+
23449
+ if (TLI.getTypeAction(*DAG.getContext(), VT) ==
23450
+ TargetLowering::TypeSplitVector) {
23451
+ // For dynamic insertelts, the type legalizer may spill the entire
23452
+ // vector. For a chain of dynamic insertelts, this can be really
23453
+ // inefficient and bad for compile time. If each insertelt is only fed
23454
+ // into the next, the vector is write-only across this chain, and we can
23455
+ // just spill once.
23456
+ SmallVector<SDNode *> Seq{N};
23457
+ unsigned NumDynamic = 1;
23458
+ while (true) {
23459
+ SDValue InVec = Seq.back()->getOperand(0);
23460
+ if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
23461
+ break;
23462
+ Seq.push_back(InVec.getNode());
23463
+ NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
23464
+ }
23465
+
23466
+ // We will lower every insertelt in the sequence to a store. In the
23467
+ // default handling, only dynamic insertelts in the sequence will be
23468
+ // lowered to a store (+ vector spilling for each). Check that our
23469
+ // approach reduces the total number of loads and stores over the default.
23470
+ if (2 * VT.getVectorMinNumElements() + Seq.size() <
23471
+ NumDynamic * 2 * VT.getVectorMinNumElements()) {
23472
+ // In cases where the vector is illegal it will be broken down into
23473
+ // parts and stored in parts - we should use the alignment for the
23474
+ // smallest part.
23475
+ Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
23476
+ SDValue StackPtr =
23477
+ DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
23478
+ auto &MF = DAG.getMachineFunction();
23479
+ int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
23480
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
23481
+
23482
+ // Begin spilling
23483
+ SDValue InVec = Seq.back()->getOperand(0);
23484
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
23485
+ PtrInfo, SmallestAlign);
23486
+
23487
+ // Lower each dynamic insertelt to a store
23488
+ for (SDNode *N : reverse(Seq)) {
23489
+ SDValue Elmnt = N->getOperand(1);
23490
+ SDValue Index = N->getOperand(2);
23491
+
23492
+ // Store the new element. This may be larger than the vector element
23493
+ // type, so use a truncating store.
23494
+ SDValue EltPtr =
23495
+ TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
23496
+ EVT EltVT = Elmnt.getValueType();
23497
+ Store = DAG.getTruncStore(
23498
+ Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
23499
+ EltVT,
23500
+ commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
23501
+ }
23502
+
23503
+ // Load the spilled vector
23504
+ SDValue Load =
23505
+ DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
23506
+ return Load.getValue(0);
23507
+ }
23508
+ }
23509
+
23448
23510
return SDValue();
23449
23511
}
23450
23512
0 commit comments