Skip to content

Commit f2a43bb

Browse files
committed
Copy how arm handles stack shuffling for tail calls
Currently this logic is only actually used for musttail calls.
1 parent e12da2e commit f2a43bb

File tree

3 files changed

+104
-31
lines changed

3 files changed

+104
-31
lines changed

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 95 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2047,23 +2047,11 @@ X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
20472047
int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
20482048
int64_t DstOffset = MFI.getObjectOffset(DstFI);
20492049

2050-
// FIXME:
2051-
2052-
// // If the source is in the local frame, then the copy to the argument
2053-
// memory
2054-
// // is always valid.
2055-
// bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2056-
// if (!FixedSrc ||
2057-
// (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2058-
// return CopyOnce;
2059-
2060-
// In the case of byval arguments split between registers and the stack,
2061-
// computeAddrForCallArg returns a FrameIndex which corresponds only to the
2062-
// stack portion, but the Src SDValue will refer to the full value, including
2063-
// the local stack memory that the register portion gets stored into. We only
2064-
// need to compare them for equality, so normalise on the full value version.
2065-
uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2066-
DstOffset -= RegSize;
2050+
// If the source is in the local frame, then the copy to the argument
2051+
// memory is always valid.
2052+
bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2053+
if (!FixedSrc || (FixedSrc && SrcOffset < 0))
2054+
return CopyOnce;
20672055

20682056
// If the value is already in the correct location, then no copying is
20692057
// needed. If not, then we need to copy via a temporary.
@@ -2201,6 +2189,74 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22012189
unsigned NumBytesToPush = NumBytes;
22022190
unsigned NumBytesToPop = NumBytes;
22032191

2192+
SDValue StackPtr;
2193+
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2194+
2195+
// If we are doing a tail-call, any byval arguments will be written to stack
2196+
// space which was used for incoming arguments. If any the values being used
2197+
// are incoming byval arguments to this function, then they might be
2198+
// overwritten by the stores of the outgoing arguments. To avoid this, we
2199+
// need to make a temporary copy of them in local stack space, then copy back
2200+
// to the argument area.
2201+
DenseMap<unsigned, SDValue> ByValTemporaries;
2202+
SDValue ByValTempChain;
2203+
if (isTailCall) {
2204+
SmallVector<SDValue, 8> ByValCopyChains;
2205+
for (const CCValAssign &VA : ArgLocs) {
2206+
unsigned ArgIdx = VA.getValNo();
2207+
SDValue Src = OutVals[ArgIdx];
2208+
ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2209+
2210+
if (!Flags.isByVal())
2211+
continue;
2212+
2213+
auto PtrVT = getPointerTy(DAG.getDataLayout());
2214+
2215+
if (!StackPtr.getNode())
2216+
StackPtr =
2217+
DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);
2218+
2219+
// Destination: where this byval should live in the callee’s frame
2220+
// after the tail call.
2221+
int32_t Offset = VA.getLocMemOffset() + FPDiff;
2222+
int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2223+
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2224+
SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
2225+
2226+
ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2227+
2228+
if (Copy == NoCopy) {
2229+
// If the argument is already at the correct offset on the stack
2230+
// (because we are forwarding a byval argument from our caller), we
2231+
// don't need any copying.
2232+
continue;
2233+
} else if (Copy == CopyOnce) {
2234+
// If the argument is in our local stack frame, no other argument
2235+
// preparation can clobber it, so we can copy it to the final location
2236+
// later.
2237+
ByValTemporaries[ArgIdx] = Src;
2238+
} else {
2239+
assert(Copy == CopyViaTemp && "unexpected enum value");
2240+
// If we might be copying this argument from the outgoing argument
2241+
// stack area, we need to copy via a temporary in the local stack
2242+
// frame.
2243+
MachineFrameInfo &MFI = MF.getFrameInfo();
2244+
int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
2245+
Flags.getNonZeroByValAlign(),
2246+
/*isSS=*/false);
2247+
SDValue Temp =
2248+
DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2249+
2250+
SDValue CopyChain =
2251+
CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
2252+
ByValCopyChains.push_back(CopyChain);
2253+
}
2254+
}
2255+
if (!ByValCopyChains.empty())
2256+
ByValTempChain =
2257+
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2258+
}
2259+
22042260
// If we have an inalloca argument, all stack space has already been allocated
22052261
// for us and be right at the top of the stack. We don't support multiple
22062262
// arguments passed in memory when using inalloca.
@@ -2241,7 +2297,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22412297

22422298
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
22432299
SmallVector<SDValue, 8> MemOpChains;
2244-
SDValue StackPtr;
22452300

22462301
// The next loop assumes that the locations are in the same order of the
22472302
// input arguments.
@@ -2250,7 +2305,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22502305

22512306
// Walk the register/memloc assignments, inserting copies/loads. In the case
22522307
// of tail call optimization arguments are handle later.
2253-
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
22542308
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
22552309
++I, ++OutIndex) {
22562310
assert(OutIndex < Outs.size() && "Invalid Out index");
@@ -2427,6 +2481,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24272481
// would clobber.
24282482
Chain = DAG.getStackArgumentTokenFactor(Chain);
24292483

2484+
if (ByValTempChain)
2485+
Chain =
2486+
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
2487+
24302488
SmallVector<SDValue, 8> MemOpChains2;
24312489
SDValue FIN;
24322490
int FI = 0;
@@ -2459,16 +2517,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24592517
FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
24602518

24612519
if (Flags.isByVal()) {
2462-
// Copy relative to framepointer.
2463-
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
2464-
if (!StackPtr.getNode())
2465-
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2466-
getPointerTy(DAG.getDataLayout()));
2467-
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2468-
StackPtr, Source);
2469-
2470-
MemOpChains2.push_back(
2471-
CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG, dl));
2520+
SDValue ByValSrc;
2521+
bool NeedsStackCopy;
2522+
if (auto It = ByValTemporaries.find(OutsIndex);
2523+
It != ByValTemporaries.end()) {
2524+
ByValSrc = It->second;
2525+
NeedsStackCopy = true;
2526+
} else {
2527+
ByValSrc = Arg;
2528+
NeedsStackCopy = false;
2529+
}
2530+
2531+
if (NeedsStackCopy) {
2532+
auto PtrVT = getPointerTy(DAG.getDataLayout());
2533+
SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
2534+
2535+
MemOpChains2.push_back(CreateCopyOfByValArgument(
2536+
ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2537+
}
24722538
} else {
24732539
// Store relative to framepointer.
24742540
MemOpChains2.push_back(DAG.getStore(

llvm/test/CodeGen/X86/musttail-inalloca.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@ define dso_local x86_thiscallcc void @methodWithVtorDisp_thunk(ptr %0, ptr inall
1818
; CHECK-LABEL: methodWithVtorDisp_thunk:
1919
; CHECK: # %bb.0:
2020
; CHECK-NEXT: pushl %esi
21+
; CHECK-NEXT: subl $20, %esp
2122
; CHECK-NEXT: movl %ecx, %esi
2223
; CHECK-NEXT: subl -4(%ecx), %esi
2324
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
2425
; CHECK-NEXT: pushl $_methodWithVtorDisp_thunk
2526
; CHECK-NEXT: calll ___cyg_profile_func_exit
2627
; CHECK-NEXT: addl $8, %esp
2728
; CHECK-NEXT: movl %esi, %ecx
29+
; CHECK-NEXT: addl $20, %esp
2830
; CHECK-NEXT: popl %esi
2931
; CHECK-NEXT: jmp _methodWithVtorDisp # TAILCALL
3032
%3 = getelementptr inbounds i8, ptr %0, i32 -4

llvm/test/CodeGen/X86/sibcall.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,15 @@ declare dso_local i32 @foo5(i32, i32, i32, i32, i32)
295295
define dso_local i32 @t12(i32 %x, i32 %y, ptr byval(%struct.t) align 4 %z) nounwind ssp {
296296
; X86-LABEL: t12:
297297
; X86: # %bb.0: # %entry
298+
; X86-NEXT: subl $20, %esp
298299
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
299-
; X86-NEXT: jne foo6 # TAILCALL
300-
; X86-NEXT: # %bb.1: # %bb2
300+
; X86-NEXT: je .LBB12_1
301+
; X86-NEXT: # %bb.2: # %bb
302+
; X86-NEXT: addl $20, %esp
303+
; X86-NEXT: jmp foo6 # TAILCALL
304+
; X86-NEXT: .LBB12_1: # %bb2
301305
; X86-NEXT: xorl %eax, %eax
306+
; X86-NEXT: addl $20, %esp
302307
; X86-NEXT: retl
303308
;
304309
; X64-LABEL: t12:

0 commit comments

Comments
 (0)