Skip to content

Commit dc1c03f

Browse files
committed
error when musttail cannot tail
1 parent c7eabbd commit dc1c03f

File tree

2 files changed

+35
-65
lines changed

2 files changed

+35
-65
lines changed

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 23 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,14 +2053,6 @@ X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
20532053
if (!FixedSrc || (FixedSrc && SrcOffset < 0))
20542054
return CopyOnce;
20552055

2056-
// In the case of byval arguments split between registers and the stack,
2057-
// computeAddrForCallArg returns a FrameIndex which corresponds only to the
2058-
// stack portion, but the Src SDValue will refer to the full value, including
2059-
// the local stack memory that the register portion gets stored into. We only
2060-
// need to compare them for equality, so normalise on the full value version.
2061-
uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2062-
DstOffset -= RegSize;
2063-
20642056
// If the value is already in the correct location, then no copying is
20652057
// needed. If not, then we need to copy via a temporary.
20662058
if (SrcOffset == DstOffset)
@@ -2154,19 +2146,15 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
21542146
IsSibcall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
21552147
IsCalleePopSRet);
21562148

2157-
if (!IsMustTail) {
2158-
isTailCall = IsSibcall;
2159-
2160-
// Sibcalls are automatically detected tailcalls which do not require
2161-
// ABI changes.
2162-
IsSibcall = IsSibcall && !IsGuaranteeTCO;
2163-
}
2149+
if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2150+
CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2151+
IsSibcall = true;
21642152

21652153
if (isTailCall)
21662154
++NumTailCalls;
21672155
}
21682156

2169-
if (IsMustTail && !isTailCall)
2157+
if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
21702158
report_fatal_error("failed to perform tail call elimination on a call "
21712159
"site marked musttail");
21722160

@@ -2231,7 +2219,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22312219

22322220
// Destination: where this byval should live in the callee’s frame
22332221
// after the tail call.
2234-
int32_t Offset = VA.getLocMemOffset() + FPDiff + RetAddrSize;
2222+
int32_t Offset = VA.getLocMemOffset() + FPDiff;
22352223
int Size = VA.getLocVT().getFixedSizeInBits() / 8;
22362224
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
22372225
SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
@@ -2265,6 +2253,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
22652253
ByValCopyChains.push_back(CopyChain);
22662254
}
22672255
}
2256+
22682257
if (!ByValCopyChains.empty())
22692258
ByValTempChain =
22702259
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
@@ -2484,7 +2473,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24842473
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
24852474
// don't need this because the eligibility check rejects calls that require
24862475
// shuffling arguments passed in memory.
2487-
if (!IsSibcall && isTailCall) {
2476+
if (isTailCall) {
24882477
// Force all the incoming stack arguments to be loaded from the stack
24892478
// before any new outgoing arguments or the return address are stored to the
24902479
// stack, because the outgoing stack slots may alias the incoming argument
@@ -2543,14 +2532,15 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25432532

25442533
// FIXME: contrary to the arm backend, with the current logic we always
25452534
// seem to need a stack copy.
2546-
(void)NeedsStackCopy;
2535+
if (NeedsStackCopy) {
25472536

2548-
auto PtrVT = getPointerTy(DAG.getDataLayout());
2549-
SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
2537+
auto PtrVT = getPointerTy(DAG.getDataLayout());
2538+
SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
25502539

2551-
// Copy the struct contents from ByValSrc to DstAddr.
2552-
MemOpChains2.push_back(CreateCopyOfByValArgument(
2553-
ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2540+
// Copy the struct contents from ByValSrc to DstAddr.
2541+
MemOpChains2.push_back(CreateCopyOfByValArgument(
2542+
ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2543+
}
25542544
} else {
25552545
// Store relative to framepointer.
25562546
MemOpChains2.push_back(DAG.getStore(
@@ -2951,9 +2941,10 @@ mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI,
29512941

29522942
/// Check whether the call is eligible for tail call optimization. Targets
29532943
/// that want to do tail call optimization should implement this function.
2954-
/// Note that the x86 backend does not check musttail calls for eligibility! The
2955-
/// rest of x86 tail call lowering must be prepared to forward arguments of any
2956-
/// type.
2944+
///
2945+
/// Note that this function also processes musttail calls, so when this
2946+
/// function returns false on a valid musttail call, a fatal backend error
2947+
/// occurs.
29572948
bool X86TargetLowering::IsEligibleForTailCallOptimization(
29582949
TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
29592950
SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
@@ -3080,26 +3071,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
30803071
// If the callee takes no arguments then go on to check the results of the
30813072
// call.
30823073
if (!Outs.empty()) {
3083-
if (StackArgsSize > 0) {
3084-
// Check if the arguments are already laid out in the right way as
3085-
// the caller's fixed stack objects.
3086-
MachineFrameInfo &MFI = MF.getFrameInfo();
3087-
const MachineRegisterInfo *MRI = &MF.getRegInfo();
3088-
const X86InstrInfo *TII = Subtarget.getInstrInfo();
3089-
for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
3090-
const CCValAssign &VA = ArgLocs[I];
3091-
SDValue Arg = OutVals[I];
3092-
ISD::ArgFlagsTy Flags = Outs[I].Flags;
3093-
if (VA.getLocInfo() == CCValAssign::Indirect)
3094-
return false;
3095-
if (!VA.isRegLoc()) {
3096-
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
3097-
TII, VA))
3098-
return false;
3099-
}
3100-
}
3101-
}
3102-
31033074
bool PositionIndependent = isPositionIndependent();
31043075
// If the tailcall address may be in a register, then make sure it's
31053076
// possible to register allocate for it. In 32-bit, the call address can
@@ -3137,6 +3108,11 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
31373108
X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
31383109
MF.getTarget().Options.GuaranteedTailCallOpt);
31393110

3111+
// If the stack arguments for this call do not fit into our own save area then
3112+
// the call cannot be made tail.
3113+
if (CCInfo.getStackSize() > FuncInfo->getArgumentStackSize())
3114+
return false;
3115+
31403116
if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
31413117
// If we have bytes to pop, the callee must pop them.
31423118
bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;

llvm/test/CodeGen/X86/musttail-struct.ll

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ define dso_local i32 @test5(ptr byval(%struct.5xi32) %0) {
4242
define dso_local i32 @testManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) {
4343
; CHECK-LABEL: testManyArgs:
4444
; CHECK: # %bb.0:
45+
; CHECK-NEXT: movzx eax, byte ptr [rsp + 8]
46+
; CHECK-NEXT: mov byte ptr [rsp + 8], al
4547
; CHECK-NEXT: jmp FuncManyArgs # TAILCALL
4648
%r = musttail call i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
4749
ret i32 %r
@@ -50,6 +52,8 @@ define dso_local i32 @testManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %
5052
define dso_local i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) {
5153
; CHECK-LABEL: testRecursion:
5254
; CHECK: # %bb.0:
55+
; CHECK-NEXT: movzx eax, byte ptr [rsp + 8]
56+
; CHECK-NEXT: mov byte ptr [rsp + 8], al
5357
; CHECK-NEXT: jmp testRecursion # TAILCALL
5458
%r = musttail call i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7)
5559
ret i32 %r
@@ -75,10 +79,8 @@ define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(
7579
; CHECK: # %bb.0:
7680
; CHECK-NEXT: mov eax, dword ptr [rsp + 8]
7781
; CHECK-NEXT: mov dword ptr [rsp - 16], eax
78-
; CHECK-NEXT: mov ecx, dword ptr [rsp + 16]
79-
; CHECK-NEXT: mov dword ptr [rsp - 8], ecx
80-
; CHECK-NEXT: mov dword ptr [rsp + 8], ecx
81-
; CHECK-NEXT: mov dword ptr [rsp + 16], eax
82+
; CHECK-NEXT: mov eax, dword ptr [rsp + 16]
83+
; CHECK-NEXT: mov dword ptr [rsp - 8], eax
8284
; CHECK-NEXT: jmp swap # TAILCALL
8385

8486

@@ -143,20 +145,12 @@ define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty
143145
; CHECK-NEXT: mov dword ptr [rsp - 8], eax
144146
; CHECK-NEXT: movaps xmm0, xmmword ptr [rsp + 8]
145147
; CHECK-NEXT: movaps xmmword ptr [rsp - 24], xmm0
146-
; CHECK-NEXT: mov ecx, dword ptr [rsp + 48]
147-
; CHECK-NEXT: mov dword ptr [rsp - 32], ecx
148-
; CHECK-NEXT: mov rdx, qword ptr [rsp + 32]
149-
; CHECK-NEXT: mov rsi, qword ptr [rsp + 40]
150-
; CHECK-NEXT: mov qword ptr [rsp - 40], rsi
151-
; CHECK-NEXT: mov qword ptr [rsp - 48], rdx
152-
; CHECK-NEXT: mov qword ptr [rsp + 8], rdx
153-
; CHECK-NEXT: mov qword ptr [rsp + 16], rsi
154-
; CHECK-NEXT: mov dword ptr [rsp + 24], ecx
155-
; CHECK-NEXT: mov rcx, qword ptr [rsp + 8]
156-
; CHECK-NEXT: mov rdx, qword ptr [rsp + 16]
157-
; CHECK-NEXT: mov qword ptr [rsp + 32], rcx
158-
; CHECK-NEXT: mov qword ptr [rsp + 40], rdx
159-
; CHECK-NEXT: mov dword ptr [rsp + 48], eax
148+
; CHECK-NEXT: mov eax, dword ptr [rsp + 48]
149+
; CHECK-NEXT: mov dword ptr [rsp - 32], eax
150+
; CHECK-NEXT: mov rax, qword ptr [rsp + 32]
151+
; CHECK-NEXT: mov rcx, qword ptr [rsp + 40]
152+
; CHECK-NEXT: mov qword ptr [rsp - 40], rcx
153+
; CHECK-NEXT: mov qword ptr [rsp - 48], rax
160154
; CHECK-NEXT: jmp two_byvals_callee@PLT # TAILCALL
161155
entry:
162156
musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)

0 commit comments

Comments
 (0)