Skip to content

Commit 114e136

Browse files
committed
copy arm byval musttail tests
1 parent 731bf99 commit 114e136

File tree

2 files changed

+58
-163
lines changed

2 files changed

+58
-163
lines changed

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2544,6 +2544,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25442544
NeedsStackCopy = !isTailCall;
25452545
}
25462546

2547+
// FIXME: contrary to the arm backend, with the current logic we always
2548+
// seem to need a stack copy.
2549+
(void)NeedsStackCopy;
2550+
25472551
auto PtrVT = getPointerTy(DAG.getDataLayout());
25482552
SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
25492553

llvm/test/CodeGen/X86/musttail-struct.ll

Lines changed: 54 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,17 @@ entry:
7272
}
7373

7474
define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) {
75-
; CHECK-LABEL: swapArguments:
75+
; CHECK-LABEL: swapByValArguments:
7676
; CHECK: # %bb.0:
77-
7877
; CHECK-NEXT: mov eax, dword ptr [rsp + 8]
7978
; CHECK-NEXT: mov dword ptr [rsp - 16], eax
8079
; CHECK-NEXT: mov ecx, dword ptr [rsp + 16]
8180
; CHECK-NEXT: mov dword ptr [rsp - 8], ecx
82-
8381
; CHECK-NEXT: mov dword ptr [rsp + 8], ecx
8482
; CHECK-NEXT: mov dword ptr [rsp + 16], eax
8583
; CHECK-NEXT: jmp swap # TAILCALL
84+
85+
8686
%r = musttail call i32 @swap(ptr byval(%struct.1xi32) %1, ptr byval(%struct.1xi32) %0)
8787
ret i32 %r
8888
}
@@ -95,37 +95,12 @@ declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
9595

9696
; Functions with byval parameters can be tail-called, because the value is
9797
; actually passed in registers and the stack in the same way for the caller and
98-
; callee. Within @large_caller the first 16 bytes of the argument are spilled
99-
; to the local stack frame, but for the tail-call they are passed in r0-r3, so
100-
; it's safe to de-allocate that memory before the call.
101-
; TODO: The SUB and STM instructions are unnecessary and could be optimised
102-
; out, but the behaviour of this is still correct.
98+
; callee. On x86 byval arguments are never (partially) passed via registers.
10399
define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
104100
; CHECK-LABEL: large_caller:
105-
; CHECK: @ %bb.0: @ %entry
106-
; CHECK-NEXT: .pad #16
107-
; CHECK-NEXT: sub sp, sp, #16
108-
; CHECK-NEXT: stm sp!, {r0, r1, r2, r3}
109-
; CHECK-NEXT: b large_callee
110-
entry:
111-
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
112-
ret void
113-
}
114-
115-
; As above, but with some inline asm to test that the arguments in r0-r3 are
116-
; re-loaded before the call.
117-
define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
118-
; CHECK-LABEL: large_caller_check_regs:
119-
; CHECK: @ %bb.0: @ %entry
120-
; CHECK-NEXT: .pad #16
121-
; CHECK-NEXT: sub sp, sp, #16
122-
; CHECK-NEXT: stm sp, {r0, r1, r2, r3}
123-
; CHECK-NEXT: @APP
124-
; CHECK-NEXT: @NO_APP
125-
; CHECK-NEXT: pop {r0, r1, r2, r3}
126-
; CHECK-NEXT: b large_callee
101+
; CHECK: # %bb.0: # %entry
102+
; CHECK-NEXT: jmp large_callee@PLT # TAILCALL
127103
entry:
128-
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
129104
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
130105
ret void
131106
}
@@ -136,32 +111,19 @@ entry:
136111
; valid.
137112
define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
138113
; CHECK-LABEL: large_caller_new_value:
139-
; CHECK: @ %bb.0: @ %entry
140-
; CHECK-NEXT: .pad #36
141-
; CHECK-NEXT: sub sp, sp, #36
142-
; CHECK-NEXT: add r12, sp, #20
143-
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
144-
; CHECK-NEXT: mov r0, #4
145-
; CHECK-NEXT: add r1, sp, #36
146-
; CHECK-NEXT: str r0, [sp, #16]
147-
; CHECK-NEXT: mov r0, #3
148-
; CHECK-NEXT: str r0, [sp, #12]
149-
; CHECK-NEXT: mov r0, #2
150-
; CHECK-NEXT: str r0, [sp, #8]
151-
; CHECK-NEXT: mov r0, #1
152-
; CHECK-NEXT: str r0, [sp, #4]
153-
; CHECK-NEXT: mov r0, #0
154-
; CHECK-NEXT: str r0, [sp]
155-
; CHECK-NEXT: mov r0, sp
156-
; CHECK-NEXT: add r0, r0, #16
157-
; CHECK-NEXT: mov r3, #3
158-
; CHECK-NEXT: ldr r2, [r0], #4
159-
; CHECK-NEXT: str r2, [r1], #4
160-
; CHECK-NEXT: mov r0, #0
161-
; CHECK-NEXT: mov r1, #1
162-
; CHECK-NEXT: mov r2, #2
163-
; CHECK-NEXT: add sp, sp, #36
164-
; CHECK-NEXT: b large_callee
114+
; CHECK: # %bb.0: # %entry
115+
; CHECK-NEXT: movabs rax, 4294967296
116+
; CHECK-NEXT: mov qword ptr [rsp - 20], rax
117+
; CHECK-NEXT: movabs rcx, 12884901890
118+
; CHECK-NEXT: mov qword ptr [rsp - 12], rcx
119+
; CHECK-NEXT: mov dword ptr [rsp - 4], 4
120+
; CHECK-NEXT: mov qword ptr [rsp - 40], rax
121+
; CHECK-NEXT: mov qword ptr [rsp - 32], rcx
122+
; CHECK-NEXT: mov qword ptr [rsp + 8], rax
123+
; CHECK-NEXT: mov qword ptr [rsp + 16], rcx
124+
; CHECK-NEXT: mov dword ptr [rsp - 24], 4
125+
; CHECK-NEXT: mov dword ptr [rsp + 24], 4
126+
; CHECK-NEXT: jmp large_callee@PLT # TAILCALL
165127
entry:
166128
%y = alloca %twenty_bytes, align 4
167129
store i32 0, ptr %y, align 4
@@ -180,103 +142,40 @@ entry:
180142
declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
181143
define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
182144
; CHECK-LABEL: swap_byvals:
183-
; CHECK: @ %bb.0: @ %entry
184-
; CHECK-NEXT: .pad #16
185-
; CHECK-NEXT: sub sp, sp, #16
186-
; CHECK-NEXT: .save {r4, r5, r11, lr}
187-
; CHECK-NEXT: push {r4, r5, r11, lr}
188-
; CHECK-NEXT: .pad #40
189-
; CHECK-NEXT: sub sp, sp, #40
190-
; CHECK-NEXT: add r12, sp, #56
191-
; CHECK-NEXT: add lr, sp, #20
192-
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
193-
; CHECK-NEXT: add r0, sp, #56
194-
; CHECK-NEXT: mov r12, sp
195-
; CHECK-NEXT: ldr r1, [r0], #4
196-
; CHECK-NEXT: mov r2, r12
197-
; CHECK-NEXT: str r1, [r2], #4
198-
; CHECK-NEXT: add r3, sp, #20
199-
; CHECK-NEXT: ldr r1, [r0], #4
200-
; CHECK-NEXT: add r4, sp, #76
201-
; CHECK-NEXT: str r1, [r2], #4
202-
; CHECK-NEXT: ldr r1, [r0], #4
203-
; CHECK-NEXT: str r1, [r2], #4
204-
; CHECK-NEXT: ldr r1, [r0], #4
205-
; CHECK-NEXT: str r1, [r2], #4
206-
; CHECK-NEXT: ldr r1, [r0], #4
207-
; CHECK-NEXT: add r0, sp, #76
208-
; CHECK-NEXT: str r1, [r2], #4
209-
; CHECK-NEXT: mov r2, lr
210-
; CHECK-NEXT: ldr r1, [r0], #4
211-
; CHECK-NEXT: str r1, [r2], #4
212-
; CHECK-NEXT: ldr r1, [r0], #4
213-
; CHECK-NEXT: str r1, [r2], #4
214-
; CHECK-NEXT: ldr r1, [r0], #4
215-
; CHECK-NEXT: str r1, [r2], #4
216-
; CHECK-NEXT: ldr r1, [r0], #4
217-
; CHECK-NEXT: str r1, [r2], #4
218-
; CHECK-NEXT: ldr r1, [r0], #4
219-
; CHECK-NEXT: str r1, [r2], #4
220-
; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
221-
; CHECK-NEXT: ldr r5, [r12], #4
222-
; CHECK-NEXT: str r5, [r4], #4
223-
; CHECK-NEXT: ldr r5, [r12], #4
224-
; CHECK-NEXT: str r5, [r4], #4
225-
; CHECK-NEXT: ldr r5, [r12], #4
226-
; CHECK-NEXT: str r5, [r4], #4
227-
; CHECK-NEXT: ldr r5, [r12], #4
228-
; CHECK-NEXT: str r5, [r4], #4
229-
; CHECK-NEXT: ldr r5, [r12], #4
230-
; CHECK-NEXT: str r5, [r4], #4
231-
; CHECK-NEXT: add r5, lr, #16
232-
; CHECK-NEXT: add r12, sp, #72
233-
; CHECK-NEXT: ldr r4, [r5], #4
234-
; CHECK-NEXT: str r4, [r12], #4
235-
; CHECK-NEXT: add sp, sp, #40
236-
; CHECK-NEXT: pop {r4, r5, r11, lr}
237-
; CHECK-NEXT: add sp, sp, #16
238-
; CHECK-NEXT: b two_byvals_callee
145+
; CHECK: # %bb.0: # %entry
146+
; CHECK-NEXT: mov eax, dword ptr [rsp + 24]
147+
; CHECK-NEXT: mov dword ptr [rsp - 8], eax
148+
; CHECK-NEXT: movaps xmm0, xmmword ptr [rsp + 8]
149+
; CHECK-NEXT: movaps xmmword ptr [rsp - 24], xmm0
150+
; CHECK-NEXT: mov ecx, dword ptr [rsp + 48]
151+
; CHECK-NEXT: mov dword ptr [rsp - 32], ecx
152+
; CHECK-NEXT: mov rdx, qword ptr [rsp + 32]
153+
; CHECK-NEXT: mov rsi, qword ptr [rsp + 40]
154+
; CHECK-NEXT: mov qword ptr [rsp - 40], rsi
155+
; CHECK-NEXT: mov qword ptr [rsp - 48], rdx
156+
; CHECK-NEXT: mov qword ptr [rsp + 8], rdx
157+
; CHECK-NEXT: mov qword ptr [rsp + 16], rsi
158+
; CHECK-NEXT: mov dword ptr [rsp + 24], ecx
159+
; CHECK-NEXT: mov rcx, qword ptr [rsp + 8]
160+
; CHECK-NEXT: mov rdx, qword ptr [rsp + 16]
161+
; CHECK-NEXT: mov qword ptr [rsp + 32], rcx
162+
; CHECK-NEXT: mov qword ptr [rsp + 40], rdx
163+
; CHECK-NEXT: mov dword ptr [rsp + 48], eax
164+
; CHECK-NEXT: jmp two_byvals_callee@PLT # TAILCALL
239165
entry:
240166
musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
241167
ret void
242168
}
243169

244-
; A forwarded byval arg, but at a different offset on the stack, so it needs to
245-
; be copied to the local stack frame first. This can't be musttail because of
246-
; the different signatures, but is still tail-called as an optimisation.
170+
; A forwarded byval arg, but at a different argument position. Because
171+
; x86 does not (partially) pass byval arguments in registers, the byval
172+
; arg is in the correct position already, so this is not a sibcall but
173+
; can be tail-call optimized.
247174
declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
248175
define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
249176
; CHECK-LABEL: shift_byval:
250-
; CHECK: @ %bb.0: @ %entry
251-
; CHECK-NEXT: .pad #12
252-
; CHECK-NEXT: sub sp, sp, #12
253-
; CHECK-NEXT: .save {r4, lr}
254-
; CHECK-NEXT: push {r4, lr}
255-
; CHECK-NEXT: .pad #20
256-
; CHECK-NEXT: sub sp, sp, #20
257-
; CHECK-NEXT: add r0, sp, #28
258-
; CHECK-NEXT: add lr, sp, #40
259-
; CHECK-NEXT: stm r0, {r1, r2, r3}
260-
; CHECK-NEXT: add r0, sp, #28
261-
; CHECK-NEXT: mov r1, sp
262-
; CHECK-NEXT: ldr r2, [r0], #4
263-
; CHECK-NEXT: add r12, r1, #16
264-
; CHECK-NEXT: str r2, [r1], #4
265-
; CHECK-NEXT: ldr r2, [r0], #4
266-
; CHECK-NEXT: str r2, [r1], #4
267-
; CHECK-NEXT: ldr r2, [r0], #4
268-
; CHECK-NEXT: str r2, [r1], #4
269-
; CHECK-NEXT: ldr r2, [r0], #4
270-
; CHECK-NEXT: str r2, [r1], #4
271-
; CHECK-NEXT: ldr r2, [r0], #4
272-
; CHECK-NEXT: str r2, [r1], #4
273-
; CHECK-NEXT: ldm sp, {r0, r1, r2, r3}
274-
; CHECK-NEXT: ldr r4, [r12], #4
275-
; CHECK-NEXT: str r4, [lr], #4
276-
; CHECK-NEXT: add sp, sp, #20
277-
; CHECK-NEXT: pop {r4, lr}
278-
; CHECK-NEXT: add sp, sp, #12
279-
; CHECK-NEXT: b shift_byval_callee
177+
; CHECK: # %bb.0: # %entry
178+
; CHECK-NEXT: jmp shift_byval_callee@PLT # TAILCALL
280179
entry:
281180
tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
282181
ret void
@@ -287,23 +186,15 @@ entry:
287186
@large_global = external global %twenty_bytes
288187
define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
289188
; CHECK-LABEL: large_caller_from_global:
290-
; CHECK: @ %bb.0: @ %entry
291-
; CHECK-NEXT: .pad #16
292-
; CHECK-NEXT: sub sp, sp, #16
293-
; CHECK-NEXT: .save {r4, lr}
294-
; CHECK-NEXT: push {r4, lr}
295-
; CHECK-NEXT: add r12, sp, #8
296-
; CHECK-NEXT: add lr, sp, #24
297-
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
298-
; CHECK-NEXT: movw r3, :lower16:large_global
299-
; CHECK-NEXT: movt r3, :upper16:large_global
300-
; CHECK-NEXT: add r12, r3, #16
301-
; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
302-
; CHECK-NEXT: ldr r4, [r12], #4
303-
; CHECK-NEXT: str r4, [lr], #4
304-
; CHECK-NEXT: pop {r4, lr}
305-
; CHECK-NEXT: add sp, sp, #16
306-
; CHECK-NEXT: b large_callee
189+
; CHECK: # %bb.0: # %entry
190+
; CHECK-NEXT: mov rax, qword ptr [rip + large_global@GOTPCREL]
191+
; CHECK-NEXT: mov ecx, dword ptr [rax + 16]
192+
; CHECK-NEXT: mov dword ptr [rsp + 24], ecx
193+
; CHECK-NEXT: mov rcx, qword ptr [rax]
194+
; CHECK-NEXT: mov rax, qword ptr [rax + 8]
195+
; CHECK-NEXT: mov qword ptr [rsp + 16], rax
196+
; CHECK-NEXT: mov qword ptr [rsp + 8], rcx
197+
; CHECK-NEXT: jmp large_callee@PLT # TAILCALL
307198
entry:
308199
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
309200
ret void

0 commit comments

Comments
 (0)