@@ -72,17 +72,17 @@ entry:
7272}
7373
7474define dso_local i32 @swapByValArguments (ptr byval (%struct.1xi32 ) %0 , ptr byval (%struct.1xi32 ) %1 ) {
75- ; CHECK-LABEL: swapArguments :
75+ ; CHECK-LABEL: swapByValArguments :
7676; CHECK: # %bb.0:
77-
7877; CHECK-NEXT: mov eax, dword ptr [rsp + 8]
7978; CHECK-NEXT: mov dword ptr [rsp - 16], eax
8079; CHECK-NEXT: mov ecx, dword ptr [rsp + 16]
8180; CHECK-NEXT: mov dword ptr [rsp - 8], ecx
82-
8381; CHECK-NEXT: mov dword ptr [rsp + 8], ecx
8482; CHECK-NEXT: mov dword ptr [rsp + 16], eax
8583; CHECK-NEXT: jmp swap # TAILCALL
84+
85+
8686 %r = musttail call i32 @swap (ptr byval (%struct.1xi32 ) %1 , ptr byval (%struct.1xi32 ) %0 )
8787 ret i32 %r
8888}
@@ -95,37 +95,12 @@ declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
9595
9696; Functions with byval parameters can be tail-called, because the value is
9797; actually passed in registers and the stack in the same way for the caller and
98- ; callee. Within @large_caller the first 16 bytes of the argument are spilled
99- ; to the local stack frame, but for the tail-call they are passed in r0-r3, so
100- ; it's safe to de-allocate that memory before the call.
101- ; TODO: The SUB and STM instructions are unnecessary and could be optimised
102- ; out, but the behaviour of this is still correct.
98+ ; callee. On x86 byval arguments are never (partially) passed via registers.
10399define void @large_caller (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
104100; CHECK-LABEL: large_caller:
105- ; CHECK: @ %bb.0: @ %entry
106- ; CHECK-NEXT: .pad #16
107- ; CHECK-NEXT: sub sp, sp, #16
108- ; CHECK-NEXT: stm sp!, {r0, r1, r2, r3}
109- ; CHECK-NEXT: b large_callee
110- entry:
111- musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
112- ret void
113- }
114-
115- ; As above, but with some inline asm to test that the arguments in r0-r3 are
116- ; re-loaded before the call.
117- define void @large_caller_check_regs (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
118- ; CHECK-LABEL: large_caller_check_regs:
119- ; CHECK: @ %bb.0: @ %entry
120- ; CHECK-NEXT: .pad #16
121- ; CHECK-NEXT: sub sp, sp, #16
122- ; CHECK-NEXT: stm sp, {r0, r1, r2, r3}
123- ; CHECK-NEXT: @APP
124- ; CHECK-NEXT: @NO_APP
125- ; CHECK-NEXT: pop {r0, r1, r2, r3}
126- ; CHECK-NEXT: b large_callee
101+ ; CHECK: # %bb.0: # %entry
102+ ; CHECK-NEXT: jmp large_callee@PLT # TAILCALL
127103entry:
128- tail call void asm sideeffect "" , "~{r0},~{r1},~{r2},~{r3}" ()
129104 musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
130105 ret void
131106}
@@ -136,32 +111,19 @@ entry:
136111; valid.
137112define void @large_caller_new_value (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
138113; CHECK-LABEL: large_caller_new_value:
139- ; CHECK: @ %bb.0: @ %entry
140- ; CHECK-NEXT: .pad #36
141- ; CHECK-NEXT: sub sp, sp, #36
142- ; CHECK-NEXT: add r12, sp, #20
143- ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
144- ; CHECK-NEXT: mov r0, #4
145- ; CHECK-NEXT: add r1, sp, #36
146- ; CHECK-NEXT: str r0, [sp, #16]
147- ; CHECK-NEXT: mov r0, #3
148- ; CHECK-NEXT: str r0, [sp, #12]
149- ; CHECK-NEXT: mov r0, #2
150- ; CHECK-NEXT: str r0, [sp, #8]
151- ; CHECK-NEXT: mov r0, #1
152- ; CHECK-NEXT: str r0, [sp, #4]
153- ; CHECK-NEXT: mov r0, #0
154- ; CHECK-NEXT: str r0, [sp]
155- ; CHECK-NEXT: mov r0, sp
156- ; CHECK-NEXT: add r0, r0, #16
157- ; CHECK-NEXT: mov r3, #3
158- ; CHECK-NEXT: ldr r2, [r0], #4
159- ; CHECK-NEXT: str r2, [r1], #4
160- ; CHECK-NEXT: mov r0, #0
161- ; CHECK-NEXT: mov r1, #1
162- ; CHECK-NEXT: mov r2, #2
163- ; CHECK-NEXT: add sp, sp, #36
164- ; CHECK-NEXT: b large_callee
114+ ; CHECK: # %bb.0: # %entry
115+ ; CHECK-NEXT: movabs rax, 4294967296
116+ ; CHECK-NEXT: mov qword ptr [rsp - 20], rax
117+ ; CHECK-NEXT: movabs rcx, 12884901890
118+ ; CHECK-NEXT: mov qword ptr [rsp - 12], rcx
119+ ; CHECK-NEXT: mov dword ptr [rsp - 4], 4
120+ ; CHECK-NEXT: mov qword ptr [rsp - 40], rax
121+ ; CHECK-NEXT: mov qword ptr [rsp - 32], rcx
122+ ; CHECK-NEXT: mov qword ptr [rsp + 8], rax
123+ ; CHECK-NEXT: mov qword ptr [rsp + 16], rcx
124+ ; CHECK-NEXT: mov dword ptr [rsp - 24], 4
125+ ; CHECK-NEXT: mov dword ptr [rsp + 24], 4
126+ ; CHECK-NEXT: jmp large_callee@PLT # TAILCALL
165127entry:
166128 %y = alloca %twenty_bytes , align 4
167129 store i32 0 , ptr %y , align 4
@@ -180,103 +142,40 @@ entry:
180142declare void @two_byvals_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 , %twenty_bytes* byval (%twenty_bytes ) align 4 )
181143define void @swap_byvals (%twenty_bytes* byval (%twenty_bytes ) align 4 %a , %twenty_bytes* byval (%twenty_bytes ) align 4 %b ) {
182144; CHECK-LABEL: swap_byvals:
183- ; CHECK: @ %bb.0: @ %entry
184- ; CHECK-NEXT: .pad #16
185- ; CHECK-NEXT: sub sp, sp, #16
186- ; CHECK-NEXT: .save {r4, r5, r11, lr}
187- ; CHECK-NEXT: push {r4, r5, r11, lr}
188- ; CHECK-NEXT: .pad #40
189- ; CHECK-NEXT: sub sp, sp, #40
190- ; CHECK-NEXT: add r12, sp, #56
191- ; CHECK-NEXT: add lr, sp, #20
192- ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
193- ; CHECK-NEXT: add r0, sp, #56
194- ; CHECK-NEXT: mov r12, sp
195- ; CHECK-NEXT: ldr r1, [r0], #4
196- ; CHECK-NEXT: mov r2, r12
197- ; CHECK-NEXT: str r1, [r2], #4
198- ; CHECK-NEXT: add r3, sp, #20
199- ; CHECK-NEXT: ldr r1, [r0], #4
200- ; CHECK-NEXT: add r4, sp, #76
201- ; CHECK-NEXT: str r1, [r2], #4
202- ; CHECK-NEXT: ldr r1, [r0], #4
203- ; CHECK-NEXT: str r1, [r2], #4
204- ; CHECK-NEXT: ldr r1, [r0], #4
205- ; CHECK-NEXT: str r1, [r2], #4
206- ; CHECK-NEXT: ldr r1, [r0], #4
207- ; CHECK-NEXT: add r0, sp, #76
208- ; CHECK-NEXT: str r1, [r2], #4
209- ; CHECK-NEXT: mov r2, lr
210- ; CHECK-NEXT: ldr r1, [r0], #4
211- ; CHECK-NEXT: str r1, [r2], #4
212- ; CHECK-NEXT: ldr r1, [r0], #4
213- ; CHECK-NEXT: str r1, [r2], #4
214- ; CHECK-NEXT: ldr r1, [r0], #4
215- ; CHECK-NEXT: str r1, [r2], #4
216- ; CHECK-NEXT: ldr r1, [r0], #4
217- ; CHECK-NEXT: str r1, [r2], #4
218- ; CHECK-NEXT: ldr r1, [r0], #4
219- ; CHECK-NEXT: str r1, [r2], #4
220- ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
221- ; CHECK-NEXT: ldr r5, [r12], #4
222- ; CHECK-NEXT: str r5, [r4], #4
223- ; CHECK-NEXT: ldr r5, [r12], #4
224- ; CHECK-NEXT: str r5, [r4], #4
225- ; CHECK-NEXT: ldr r5, [r12], #4
226- ; CHECK-NEXT: str r5, [r4], #4
227- ; CHECK-NEXT: ldr r5, [r12], #4
228- ; CHECK-NEXT: str r5, [r4], #4
229- ; CHECK-NEXT: ldr r5, [r12], #4
230- ; CHECK-NEXT: str r5, [r4], #4
231- ; CHECK-NEXT: add r5, lr, #16
232- ; CHECK-NEXT: add r12, sp, #72
233- ; CHECK-NEXT: ldr r4, [r5], #4
234- ; CHECK-NEXT: str r4, [r12], #4
235- ; CHECK-NEXT: add sp, sp, #40
236- ; CHECK-NEXT: pop {r4, r5, r11, lr}
237- ; CHECK-NEXT: add sp, sp, #16
238- ; CHECK-NEXT: b two_byvals_callee
145+ ; CHECK: # %bb.0: # %entry
146+ ; CHECK-NEXT: mov eax, dword ptr [rsp + 24]
147+ ; CHECK-NEXT: mov dword ptr [rsp - 8], eax
148+ ; CHECK-NEXT: movaps xmm0, xmmword ptr [rsp + 8]
149+ ; CHECK-NEXT: movaps xmmword ptr [rsp - 24], xmm0
150+ ; CHECK-NEXT: mov ecx, dword ptr [rsp + 48]
151+ ; CHECK-NEXT: mov dword ptr [rsp - 32], ecx
152+ ; CHECK-NEXT: mov rdx, qword ptr [rsp + 32]
153+ ; CHECK-NEXT: mov rsi, qword ptr [rsp + 40]
154+ ; CHECK-NEXT: mov qword ptr [rsp - 40], rsi
155+ ; CHECK-NEXT: mov qword ptr [rsp - 48], rdx
156+ ; CHECK-NEXT: mov qword ptr [rsp + 8], rdx
157+ ; CHECK-NEXT: mov qword ptr [rsp + 16], rsi
158+ ; CHECK-NEXT: mov dword ptr [rsp + 24], ecx
159+ ; CHECK-NEXT: mov rcx, qword ptr [rsp + 8]
160+ ; CHECK-NEXT: mov rdx, qword ptr [rsp + 16]
161+ ; CHECK-NEXT: mov qword ptr [rsp + 32], rcx
162+ ; CHECK-NEXT: mov qword ptr [rsp + 40], rdx
163+ ; CHECK-NEXT: mov dword ptr [rsp + 48], eax
164+ ; CHECK-NEXT: jmp two_byvals_callee@PLT # TAILCALL
239165entry:
240166 musttail call void @two_byvals_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %b , %twenty_bytes* byval (%twenty_bytes ) align 4 %a )
241167 ret void
242168}
243169
244- ; A forwarded byval arg, but at a different offset on the stack, so it needs to
245- ; be copied to the local stack frame first. This can't be musttail because of
246- ; the different signatures, but is still tail-called as an optimisation.
170+ ; A forwarded byval arg, but at a different argument position. Because
171+ ; x86 does not (partially) pass byval arguments in registers, the byval
172+ ; arg is in the correct position already, so this is not a sibcall but
173+ ; can be tail-call optimized.
247174declare void @shift_byval_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
248175define void @shift_byval (i32 %a , %twenty_bytes* byval (%twenty_bytes ) align 4 %b ) {
249176; CHECK-LABEL: shift_byval:
250- ; CHECK: @ %bb.0: @ %entry
251- ; CHECK-NEXT: .pad #12
252- ; CHECK-NEXT: sub sp, sp, #12
253- ; CHECK-NEXT: .save {r4, lr}
254- ; CHECK-NEXT: push {r4, lr}
255- ; CHECK-NEXT: .pad #20
256- ; CHECK-NEXT: sub sp, sp, #20
257- ; CHECK-NEXT: add r0, sp, #28
258- ; CHECK-NEXT: add lr, sp, #40
259- ; CHECK-NEXT: stm r0, {r1, r2, r3}
260- ; CHECK-NEXT: add r0, sp, #28
261- ; CHECK-NEXT: mov r1, sp
262- ; CHECK-NEXT: ldr r2, [r0], #4
263- ; CHECK-NEXT: add r12, r1, #16
264- ; CHECK-NEXT: str r2, [r1], #4
265- ; CHECK-NEXT: ldr r2, [r0], #4
266- ; CHECK-NEXT: str r2, [r1], #4
267- ; CHECK-NEXT: ldr r2, [r0], #4
268- ; CHECK-NEXT: str r2, [r1], #4
269- ; CHECK-NEXT: ldr r2, [r0], #4
270- ; CHECK-NEXT: str r2, [r1], #4
271- ; CHECK-NEXT: ldr r2, [r0], #4
272- ; CHECK-NEXT: str r2, [r1], #4
273- ; CHECK-NEXT: ldm sp, {r0, r1, r2, r3}
274- ; CHECK-NEXT: ldr r4, [r12], #4
275- ; CHECK-NEXT: str r4, [lr], #4
276- ; CHECK-NEXT: add sp, sp, #20
277- ; CHECK-NEXT: pop {r4, lr}
278- ; CHECK-NEXT: add sp, sp, #12
279- ; CHECK-NEXT: b shift_byval_callee
177+ ; CHECK: # %bb.0: # %entry
178+ ; CHECK-NEXT: jmp shift_byval_callee@PLT # TAILCALL
280179entry:
281180 tail call void @shift_byval_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %b )
282181 ret void
@@ -287,23 +186,15 @@ entry:
287186@large_global = external global %twenty_bytes
288187define void @large_caller_from_global (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
289188; CHECK-LABEL: large_caller_from_global:
290- ; CHECK: @ %bb.0: @ %entry
291- ; CHECK-NEXT: .pad #16
292- ; CHECK-NEXT: sub sp, sp, #16
293- ; CHECK-NEXT: .save {r4, lr}
294- ; CHECK-NEXT: push {r4, lr}
295- ; CHECK-NEXT: add r12, sp, #8
296- ; CHECK-NEXT: add lr, sp, #24
297- ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
298- ; CHECK-NEXT: movw r3, :lower16:large_global
299- ; CHECK-NEXT: movt r3, :upper16:large_global
300- ; CHECK-NEXT: add r12, r3, #16
301- ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
302- ; CHECK-NEXT: ldr r4, [r12], #4
303- ; CHECK-NEXT: str r4, [lr], #4
304- ; CHECK-NEXT: pop {r4, lr}
305- ; CHECK-NEXT: add sp, sp, #16
306- ; CHECK-NEXT: b large_callee
189+ ; CHECK: # %bb.0: # %entry
190+ ; CHECK-NEXT: mov rax, qword ptr [rip + large_global@GOTPCREL]
191+ ; CHECK-NEXT: mov ecx, dword ptr [rax + 16]
192+ ; CHECK-NEXT: mov dword ptr [rsp + 24], ecx
193+ ; CHECK-NEXT: mov rcx, qword ptr [rax]
194+ ; CHECK-NEXT: mov rax, qword ptr [rax + 8]
195+ ; CHECK-NEXT: mov qword ptr [rsp + 16], rax
196+ ; CHECK-NEXT: mov qword ptr [rsp + 8], rcx
197+ ; CHECK-NEXT: jmp large_callee@PLT # TAILCALL
307198entry:
308199 musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 @large_global )
309200 ret void
0 commit comments