@@ -2325,6 +2325,59 @@ std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
23252325 return std::make_pair(DstAddr, DstInfo);
23262326}
23272327
2328+ // Returns the type of copying which is required to set up a byval argument to
2329+ // a tail-called function. This isn't needed for non-tail calls, because they
2330+ // always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2331+ // avoid clobbering another argument (CopyViaTemp), and sometimes can be
2332+ // optimised to zero copies when forwarding an argument from the caller's
2333+ // caller (NoCopy).
2334+ ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2335+ SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2336+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2337+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2338+
2339+ // Globals are always safe to copy from.
2340+ if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
2341+ return CopyOnce;
2342+
2343+ // Can only analyse frame index nodes, conservatively assume we need a
2344+ // temporary.
2345+ auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2346+ auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2347+ if (!SrcFrameIdxNode || !DstFrameIdxNode)
2348+ return CopyViaTemp;
2349+
2350+ int SrcFI = SrcFrameIdxNode->getIndex();
2351+ int DstFI = DstFrameIdxNode->getIndex();
2352+ assert(MFI.isFixedObjectIndex(DstFI) &&
2353+ "byval passed in non-fixed stack slot");
2354+
2355+ int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2356+ int64_t DstOffset = MFI.getObjectOffset(DstFI);
2357+
2358+ // If the source is in the local frame, then the copy to the argument memory
2359+ // is always valid.
2360+ bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2361+ if (!FixedSrc ||
2362+ (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2363+ return CopyOnce;
2364+
2365+ // In the case of byval arguments split between registers and the stack,
2366+ // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2367+ // stack portion, but the Src SDValue will refer to the full value, including
2368+ // the local stack memory that the register portion gets stored into. We only
2369+ // need to compare them for equality, so normalise on the full value version.
2370+ uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2371+ DstOffset -= RegSize;
2372+
2373+ // If the value is already in the correct location, then no copying is
2374+ // needed. If not, then we need to copy via a temporary.
2375+ if (SrcOffset == DstOffset)
2376+ return NoCopy;
2377+ else
2378+ return CopyViaTemp;
2379+ }
2380+
23282381void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
23292382 SDValue Chain, SDValue &Arg,
23302383 RegsToPassVector &RegsToPass,
@@ -2499,37 +2552,58 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24992552 // overwritten by the stores of the outgoing arguments. To avoid this, we
25002553 // need to make a temporary copy of them in local stack space, then copy back
25012554 // to the argument area.
2502- // TODO This could be optimised to skip byvals which are already being copied
2503- // from local stack space, or which are copied from the incoming stack at the
2504- // exact same location.
25052555 DenseMap<unsigned, SDValue> ByValTemporaries;
25062556 SDValue ByValTempChain;
25072557 if (isTailCall) {
2508- for (unsigned ArgIdx = 0, e = OutVals.size(); ArgIdx != e; ++ArgIdx) {
2509- SDValue Arg = OutVals[ArgIdx];
2558+ SmallVector<SDValue, 8> ByValCopyChains;
2559+ for (const CCValAssign &VA : ArgLocs) {
2560+ unsigned ArgIdx = VA.getValNo();
2561+ SDValue Src = OutVals[ArgIdx];
25102562 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
25112563
2512- if (Flags.isByVal()) {
2513- int FrameIdx = MFI.CreateStackObject(
2564+ if (!Flags.isByVal())
2565+ continue;
2566+
2567+ SDValue Dst;
2568+ MachinePointerInfo DstInfo;
2569+ std::tie(Dst, DstInfo) =
2570+ computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2571+ ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2572+
2573+ if (Copy == NoCopy) {
2574+ // If the argument is already at the correct offset on the stack
2575+ // (because we are forwarding a byval argument from our caller), we
2576+ // don't need any copying.
2577+ continue;
2578+ } else if (Copy == CopyOnce) {
2579+ // If the argument is in our local stack frame, no other argument
2580+ // preparation can clobber it, so we can copy it to the final location
2581+ // later.
2582+ ByValTemporaries[ArgIdx] = Src;
2583+ } else {
2584+ assert(Copy == CopyViaTemp && "unexpected enum value");
2585+ // If we might be copying this argument from the outgoing argument
2586+ // stack area, we need to copy via a temporary in the local stack
2587+ // frame.
2588+ int TempFrameIdx = MFI.CreateStackObject(
25142589 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2515- SDValue Dst =
2516- DAG.getFrameIndex(FrameIdx , getPointerTy(DAG.getDataLayout()));
2590+ SDValue Temp =
2591+ DAG.getFrameIndex(TempFrameIdx , getPointerTy(DAG.getDataLayout()));
25172592
25182593 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
25192594 SDValue AlignNode =
25202595 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
25212596
25222597 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2523- SDValue Ops[] = { Chain, Dst, Arg , SizeNode, AlignNode};
2524- MemOpChains .push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2525- Ops));
2526- ByValTemporaries[ArgIdx] = Dst ;
2598+ SDValue Ops[] = {Chain, Temp, Src , SizeNode, AlignNode};
2599+ ByValCopyChains .push_back(
2600+ DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2601+ ByValTemporaries[ArgIdx] = Temp ;
25272602 }
25282603 }
2529- if (!MemOpChains.empty()) {
2530- ByValTempChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2531- MemOpChains.clear();
2532- }
2604+ if (!ByValCopyChains.empty())
2605+ ByValTempChain =
2606+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
25332607 }
25342608
25352609 // During a tail call, stores to the argument area must happen after all of
@@ -2644,13 +2718,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
26442718 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
26452719
26462720 SDValue ByValSrc;
2647- if (ByValTemporaries.contains(realArgIdx))
2721+ bool NeedsStackCopy;
2722+ if (ByValTemporaries.contains(realArgIdx)) {
26482723 ByValSrc = ByValTemporaries[realArgIdx];
2649- else
2724+ NeedsStackCopy = true;
2725+ } else {
26502726 ByValSrc = Arg;
2727+ NeedsStackCopy = !isTailCall;
2728+ }
26512729
2730+ // If part of the argument is in registers, load them.
26522731 if (CurByValIdx < ByValArgsCount) {
2653-
26542732 unsigned RegBegin, RegEnd;
26552733 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
26562734
@@ -2674,7 +2752,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
26742752 CCInfo.nextInRegsParam();
26752753 }
26762754
2677- if (Flags.getByValSize() > 4*offset) {
2755+ // If the memory part of the argument isn't already in the correct place
2756+ // (which can happen with tail calls), copy it into the argument area.
2757+ if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
26782758 auto PtrVT = getPointerTy(DAG.getDataLayout());
26792759 SDValue Dst;
26802760 MachinePointerInfo DstInfo;
0 commit comments