@@ -86,3 +86,225 @@ define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(
8686 %r = musttail call i32 @swap (ptr byval (%struct.1xi32 ) %1 , ptr byval (%struct.1xi32 ) %0 )
8787 ret i32 %r
8888}
89+
90+ ; Clang only uses byval for arguments of 65 bytes or larger, but e.g. rustc
91+ ; does use byval for smaller types. Here we use a 20 byte struct to keep
92+ ; the tests more readable.
93+ %twenty_bytes = type { [5 x i32 ] }
94+ declare void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
95+
96+ ; Functions with byval parameters can be tail-called, because the value is
97+ ; actually passed in registers and the stack in the same way for the caller and
98+ ; callee. Within @large_caller the first 16 bytes of the argument are spilled
99+ ; to the local stack frame, but for the tail-call they are passed in r0-r3, so
100+ ; it's safe to de-allocate that memory before the call.
101+ ; TODO: The SUB and STM instructions are unnecessary and could be optimised
102+ ; out, but the behaviour of this is still correct.
103+ define void @large_caller (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
104+ ; CHECK-LABEL: large_caller:
105+ ; CHECK: @ %bb.0: @ %entry
106+ ; CHECK-NEXT: .pad #16
107+ ; CHECK-NEXT: sub sp, sp, #16
108+ ; CHECK-NEXT: stm sp!, {r0, r1, r2, r3}
109+ ; CHECK-NEXT: b large_callee
110+ entry:
111+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
112+ ret void
113+ }
114+
115+ ; As above, but with some inline asm to test that the arguments in r0-r3 are
116+ ; re-loaded before the call.
117+ define void @large_caller_check_regs (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
118+ ; CHECK-LABEL: large_caller_check_regs:
119+ ; CHECK: @ %bb.0: @ %entry
120+ ; CHECK-NEXT: .pad #16
121+ ; CHECK-NEXT: sub sp, sp, #16
122+ ; CHECK-NEXT: stm sp, {r0, r1, r2, r3}
123+ ; CHECK-NEXT: @APP
124+ ; CHECK-NEXT: @NO_APP
125+ ; CHECK-NEXT: pop {r0, r1, r2, r3}
126+ ; CHECK-NEXT: b large_callee
127+ entry:
128+ tail call void asm sideeffect "" , "~{r0},~{r1},~{r2},~{r3}" ()
129+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
130+ ret void
131+ }
132+
133+ ; The IR for this one looks dodgy, because it has an alloca passed to a
134+ ; musttail function, but it is passed as a byval argument, so will be copied
135+ ; into the stack space allocated by @large_caller_new_value's caller, so is
136+ ; valid.
137+ define void @large_caller_new_value (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
138+ ; CHECK-LABEL: large_caller_new_value:
139+ ; CHECK: @ %bb.0: @ %entry
140+ ; CHECK-NEXT: .pad #36
141+ ; CHECK-NEXT: sub sp, sp, #36
142+ ; CHECK-NEXT: add r12, sp, #20
143+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
144+ ; CHECK-NEXT: mov r0, #4
145+ ; CHECK-NEXT: add r1, sp, #36
146+ ; CHECK-NEXT: str r0, [sp, #16]
147+ ; CHECK-NEXT: mov r0, #3
148+ ; CHECK-NEXT: str r0, [sp, #12]
149+ ; CHECK-NEXT: mov r0, #2
150+ ; CHECK-NEXT: str r0, [sp, #8]
151+ ; CHECK-NEXT: mov r0, #1
152+ ; CHECK-NEXT: str r0, [sp, #4]
153+ ; CHECK-NEXT: mov r0, #0
154+ ; CHECK-NEXT: str r0, [sp]
155+ ; CHECK-NEXT: mov r0, sp
156+ ; CHECK-NEXT: add r0, r0, #16
157+ ; CHECK-NEXT: mov r3, #3
158+ ; CHECK-NEXT: ldr r2, [r0], #4
159+ ; CHECK-NEXT: str r2, [r1], #4
160+ ; CHECK-NEXT: mov r0, #0
161+ ; CHECK-NEXT: mov r1, #1
162+ ; CHECK-NEXT: mov r2, #2
163+ ; CHECK-NEXT: add sp, sp, #36
164+ ; CHECK-NEXT: b large_callee
165+ entry:
166+ %y = alloca %twenty_bytes , align 4
167+ store i32 0 , ptr %y , align 4
168+ %0 = getelementptr inbounds i8 , ptr %y , i32 4
169+ store i32 1 , ptr %0 , align 4
170+ %1 = getelementptr inbounds i8 , ptr %y , i32 8
171+ store i32 2 , ptr %1 , align 4
172+ %2 = getelementptr inbounds i8 , ptr %y , i32 12
173+ store i32 3 , ptr %2 , align 4
174+ %3 = getelementptr inbounds i8 , ptr %y , i32 16
175+ store i32 4 , ptr %3 , align 4
176+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %y )
177+ ret void
178+ }
179+
180+ declare void @two_byvals_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 , %twenty_bytes* byval (%twenty_bytes ) align 4 )
181+ define void @swap_byvals (%twenty_bytes* byval (%twenty_bytes ) align 4 %a , %twenty_bytes* byval (%twenty_bytes ) align 4 %b ) {
182+ ; CHECK-LABEL: swap_byvals:
183+ ; CHECK: @ %bb.0: @ %entry
184+ ; CHECK-NEXT: .pad #16
185+ ; CHECK-NEXT: sub sp, sp, #16
186+ ; CHECK-NEXT: .save {r4, r5, r11, lr}
187+ ; CHECK-NEXT: push {r4, r5, r11, lr}
188+ ; CHECK-NEXT: .pad #40
189+ ; CHECK-NEXT: sub sp, sp, #40
190+ ; CHECK-NEXT: add r12, sp, #56
191+ ; CHECK-NEXT: add lr, sp, #20
192+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
193+ ; CHECK-NEXT: add r0, sp, #56
194+ ; CHECK-NEXT: mov r12, sp
195+ ; CHECK-NEXT: ldr r1, [r0], #4
196+ ; CHECK-NEXT: mov r2, r12
197+ ; CHECK-NEXT: str r1, [r2], #4
198+ ; CHECK-NEXT: add r3, sp, #20
199+ ; CHECK-NEXT: ldr r1, [r0], #4
200+ ; CHECK-NEXT: add r4, sp, #76
201+ ; CHECK-NEXT: str r1, [r2], #4
202+ ; CHECK-NEXT: ldr r1, [r0], #4
203+ ; CHECK-NEXT: str r1, [r2], #4
204+ ; CHECK-NEXT: ldr r1, [r0], #4
205+ ; CHECK-NEXT: str r1, [r2], #4
206+ ; CHECK-NEXT: ldr r1, [r0], #4
207+ ; CHECK-NEXT: add r0, sp, #76
208+ ; CHECK-NEXT: str r1, [r2], #4
209+ ; CHECK-NEXT: mov r2, lr
210+ ; CHECK-NEXT: ldr r1, [r0], #4
211+ ; CHECK-NEXT: str r1, [r2], #4
212+ ; CHECK-NEXT: ldr r1, [r0], #4
213+ ; CHECK-NEXT: str r1, [r2], #4
214+ ; CHECK-NEXT: ldr r1, [r0], #4
215+ ; CHECK-NEXT: str r1, [r2], #4
216+ ; CHECK-NEXT: ldr r1, [r0], #4
217+ ; CHECK-NEXT: str r1, [r2], #4
218+ ; CHECK-NEXT: ldr r1, [r0], #4
219+ ; CHECK-NEXT: str r1, [r2], #4
220+ ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
221+ ; CHECK-NEXT: ldr r5, [r12], #4
222+ ; CHECK-NEXT: str r5, [r4], #4
223+ ; CHECK-NEXT: ldr r5, [r12], #4
224+ ; CHECK-NEXT: str r5, [r4], #4
225+ ; CHECK-NEXT: ldr r5, [r12], #4
226+ ; CHECK-NEXT: str r5, [r4], #4
227+ ; CHECK-NEXT: ldr r5, [r12], #4
228+ ; CHECK-NEXT: str r5, [r4], #4
229+ ; CHECK-NEXT: ldr r5, [r12], #4
230+ ; CHECK-NEXT: str r5, [r4], #4
231+ ; CHECK-NEXT: add r5, lr, #16
232+ ; CHECK-NEXT: add r12, sp, #72
233+ ; CHECK-NEXT: ldr r4, [r5], #4
234+ ; CHECK-NEXT: str r4, [r12], #4
235+ ; CHECK-NEXT: add sp, sp, #40
236+ ; CHECK-NEXT: pop {r4, r5, r11, lr}
237+ ; CHECK-NEXT: add sp, sp, #16
238+ ; CHECK-NEXT: b two_byvals_callee
239+ entry:
240+ musttail call void @two_byvals_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %b , %twenty_bytes* byval (%twenty_bytes ) align 4 %a )
241+ ret void
242+ }
243+
244+ ; A forwarded byval arg, but at a different offset on the stack, so it needs to
245+ ; be copied to the local stack frame first. This can't be musttail because of
246+ ; the different signatures, but is still tail-called as an optimisation.
247+ declare void @shift_byval_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
248+ define void @shift_byval (i32 %a , %twenty_bytes* byval (%twenty_bytes ) align 4 %b ) {
249+ ; CHECK-LABEL: shift_byval:
250+ ; CHECK: @ %bb.0: @ %entry
251+ ; CHECK-NEXT: .pad #12
252+ ; CHECK-NEXT: sub sp, sp, #12
253+ ; CHECK-NEXT: .save {r4, lr}
254+ ; CHECK-NEXT: push {r4, lr}
255+ ; CHECK-NEXT: .pad #20
256+ ; CHECK-NEXT: sub sp, sp, #20
257+ ; CHECK-NEXT: add r0, sp, #28
258+ ; CHECK-NEXT: add lr, sp, #40
259+ ; CHECK-NEXT: stm r0, {r1, r2, r3}
260+ ; CHECK-NEXT: add r0, sp, #28
261+ ; CHECK-NEXT: mov r1, sp
262+ ; CHECK-NEXT: ldr r2, [r0], #4
263+ ; CHECK-NEXT: add r12, r1, #16
264+ ; CHECK-NEXT: str r2, [r1], #4
265+ ; CHECK-NEXT: ldr r2, [r0], #4
266+ ; CHECK-NEXT: str r2, [r1], #4
267+ ; CHECK-NEXT: ldr r2, [r0], #4
268+ ; CHECK-NEXT: str r2, [r1], #4
269+ ; CHECK-NEXT: ldr r2, [r0], #4
270+ ; CHECK-NEXT: str r2, [r1], #4
271+ ; CHECK-NEXT: ldr r2, [r0], #4
272+ ; CHECK-NEXT: str r2, [r1], #4
273+ ; CHECK-NEXT: ldm sp, {r0, r1, r2, r3}
274+ ; CHECK-NEXT: ldr r4, [r12], #4
275+ ; CHECK-NEXT: str r4, [lr], #4
276+ ; CHECK-NEXT: add sp, sp, #20
277+ ; CHECK-NEXT: pop {r4, lr}
278+ ; CHECK-NEXT: add sp, sp, #12
279+ ; CHECK-NEXT: b shift_byval_callee
280+ entry:
281+ tail call void @shift_byval_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %b )
282+ ret void
283+ }
284+
285+ ; A global object passed to a byval argument, so it must be copied, but doesn't
286+ ; need a stack temporary.
287+ @large_global = external global %twenty_bytes
288+ define void @large_caller_from_global (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
289+ ; CHECK-LABEL: large_caller_from_global:
290+ ; CHECK: @ %bb.0: @ %entry
291+ ; CHECK-NEXT: .pad #16
292+ ; CHECK-NEXT: sub sp, sp, #16
293+ ; CHECK-NEXT: .save {r4, lr}
294+ ; CHECK-NEXT: push {r4, lr}
295+ ; CHECK-NEXT: add r12, sp, #8
296+ ; CHECK-NEXT: add lr, sp, #24
297+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
298+ ; CHECK-NEXT: movw r3, :lower16:large_global
299+ ; CHECK-NEXT: movt r3, :upper16:large_global
300+ ; CHECK-NEXT: add r12, r3, #16
301+ ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
302+ ; CHECK-NEXT: ldr r4, [r12], #4
303+ ; CHECK-NEXT: str r4, [lr], #4
304+ ; CHECK-NEXT: pop {r4, lr}
305+ ; CHECK-NEXT: add sp, sp, #16
306+ ; CHECK-NEXT: b large_callee
307+ entry:
308+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 @large_global )
309+ ret void
310+ }
0 commit comments