@@ -2047,23 +2047,11 @@ X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
20472047 int64_t SrcOffset = MFI.getObjectOffset (SrcFI);
20482048 int64_t DstOffset = MFI.getObjectOffset (DstFI);
20492049
2050- // FIXME:
2051-
2052- // // If the source is in the local frame, then the copy to the argument
2053- // memory
2054- // // is always valid.
2055- // bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2056- // if (!FixedSrc ||
2057- // (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2058- // return CopyOnce;
2059-
2060- // In the case of byval arguments split between registers and the stack,
2061- // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2062- // stack portion, but the Src SDValue will refer to the full value, including
2063- // the local stack memory that the register portion gets stored into. We only
2064- // need to compare them for equality, so normalise on the full value version.
2065- uint64_t RegSize = Flags.getByValSize () - MFI.getObjectSize (DstFI);
2066- DstOffset -= RegSize;
2050+ // If the source is in the local frame, then the copy to the argument
2051+ // memory is always valid.
2052+ bool FixedSrc = MFI.isFixedObjectIndex (SrcFI);
2053+ if (!FixedSrc || (FixedSrc && SrcOffset < 0 ))
2054+ return CopyOnce;
20672055
20682056 // If the value is already in the correct location, then no copying is
20692057 // needed. If not, then we need to copy via a temporary.
@@ -2201,6 +2189,74 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22012189 unsigned NumBytesToPush = NumBytes;
22022190 unsigned NumBytesToPop = NumBytes;
22032191
2192+ SDValue StackPtr;
2193+ const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo ();
2194+
2195+ // If we are doing a tail-call, any byval arguments will be written to stack
2196+ // space which was used for incoming arguments. If any the values being used
2197+ // are incoming byval arguments to this function, then they might be
2198+ // overwritten by the stores of the outgoing arguments. To avoid this, we
2199+ // need to make a temporary copy of them in local stack space, then copy back
2200+ // to the argument area.
2201+ DenseMap<unsigned , SDValue> ByValTemporaries;
2202+ SDValue ByValTempChain;
2203+ if (isTailCall) {
2204+ SmallVector<SDValue, 8 > ByValCopyChains;
2205+ for (const CCValAssign &VA : ArgLocs) {
2206+ unsigned ArgIdx = VA.getValNo ();
2207+ SDValue Src = OutVals[ArgIdx];
2208+ ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags ;
2209+
2210+ if (!Flags.isByVal ())
2211+ continue ;
2212+
2213+ auto PtrVT = getPointerTy (DAG.getDataLayout ());
2214+
2215+ if (!StackPtr.getNode ())
2216+ StackPtr =
2217+ DAG.getCopyFromReg (Chain, dl, RegInfo->getStackRegister (), PtrVT);
2218+
2219+ // Destination: where this byval should live in the callee’s frame
2220+ // after the tail call.
2221+ int32_t Offset = VA.getLocMemOffset () + FPDiff;
2222+ int Size = VA.getLocVT ().getFixedSizeInBits () / 8 ;
2223+ int FI = MF.getFrameInfo ().CreateFixedObject (Size, Offset, true );
2224+ SDValue Dst = DAG.getFrameIndex (FI, PtrVT);
2225+
2226+ ByValCopyKind Copy = ByValNeedsCopyForTailCall (DAG, Src, Dst, Flags);
2227+
2228+ if (Copy == NoCopy) {
2229+ // If the argument is already at the correct offset on the stack
2230+ // (because we are forwarding a byval argument from our caller), we
2231+ // don't need any copying.
2232+ continue ;
2233+ } else if (Copy == CopyOnce) {
2234+ // If the argument is in our local stack frame, no other argument
2235+ // preparation can clobber it, so we can copy it to the final location
2236+ // later.
2237+ ByValTemporaries[ArgIdx] = Src;
2238+ } else {
2239+ assert (Copy == CopyViaTemp && " unexpected enum value" );
2240+ // If we might be copying this argument from the outgoing argument
2241+ // stack area, we need to copy via a temporary in the local stack
2242+ // frame.
2243+ MachineFrameInfo &MFI = MF.getFrameInfo ();
2244+ int TempFrameIdx = MFI.CreateStackObject (Flags.getByValSize (),
2245+ Flags.getNonZeroByValAlign (),
2246+ /* isSS=*/ false );
2247+ SDValue Temp =
2248+ DAG.getFrameIndex (TempFrameIdx, getPointerTy (DAG.getDataLayout ()));
2249+
2250+ SDValue CopyChain =
2251+ CreateCopyOfByValArgument (Src, Temp, Chain, Flags, DAG, dl);
2252+ ByValCopyChains.push_back (CopyChain);
2253+ }
2254+ }
2255+ if (!ByValCopyChains.empty ())
2256+ ByValTempChain =
2257+ DAG.getNode (ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2258+ }
2259+
22042260 // If we have an inalloca argument, all stack space has already been allocated
22052261 // for us and be right at the top of the stack. We don't support multiple
22062262 // arguments passed in memory when using inalloca.
@@ -2241,7 +2297,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22412297
22422298 SmallVector<std::pair<Register, SDValue>, 8 > RegsToPass;
22432299 SmallVector<SDValue, 8 > MemOpChains;
2244- SDValue StackPtr;
22452300
22462301 // The next loop assumes that the locations are in the same order of the
22472302 // input arguments.
@@ -2250,7 +2305,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22502305
22512306 // Walk the register/memloc assignments, inserting copies/loads. In the case
22522307 // of tail call optimization arguments are handle later.
2253- const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo ();
22542308 for (unsigned I = 0 , OutIndex = 0 , E = ArgLocs.size (); I != E;
22552309 ++I, ++OutIndex) {
22562310 assert (OutIndex < Outs.size () && " Invalid Out index" );
@@ -2427,6 +2481,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24272481 // would clobber.
24282482 Chain = DAG.getStackArgumentTokenFactor (Chain);
24292483
2484+ if (ByValTempChain)
2485+ Chain =
2486+ DAG.getNode (ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
2487+
24302488 SmallVector<SDValue, 8 > MemOpChains2;
24312489 SDValue FIN;
24322490 int FI = 0 ;
@@ -2459,16 +2517,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24592517 FIN = DAG.getFrameIndex (FI, getPointerTy (DAG.getDataLayout ()));
24602518
24612519 if (Flags.isByVal ()) {
2462- // Copy relative to framepointer.
2463- SDValue Source = DAG.getIntPtrConstant (VA.getLocMemOffset (), dl);
2464- if (!StackPtr.getNode ())
2465- StackPtr = DAG.getCopyFromReg (Chain, dl, RegInfo->getStackRegister (),
2466- getPointerTy (DAG.getDataLayout ()));
2467- Source = DAG.getNode (ISD::ADD, dl, getPointerTy (DAG.getDataLayout ()),
2468- StackPtr, Source);
2469-
2470- MemOpChains2.push_back (
2471- CreateCopyOfByValArgument (Source, FIN, Chain, Flags, DAG, dl));
2520+ SDValue ByValSrc;
2521+ bool NeedsStackCopy;
2522+ if (auto It = ByValTemporaries.find (OutsIndex);
2523+ It != ByValTemporaries.end ()) {
2524+ ByValSrc = It->second ;
2525+ NeedsStackCopy = true ;
2526+ } else {
2527+ ByValSrc = Arg;
2528+ NeedsStackCopy = false ;
2529+ }
2530+
2531+ if (NeedsStackCopy) {
2532+ auto PtrVT = getPointerTy (DAG.getDataLayout ());
2533+ SDValue DstAddr = DAG.getFrameIndex (FI, PtrVT);
2534+
2535+ MemOpChains2.push_back (CreateCopyOfByValArgument (
2536+ ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2537+ }
24722538 } else {
24732539 // Store relative to framepointer.
24742540 MemOpChains2.push_back (DAG.getStore (
0 commit comments