Skip to content

Commit 731bf99

Browse files
committed
add byval arm tests
1 parent 4636011 commit 731bf99

File tree

1 file changed

+222
-0
lines changed

1 file changed

+222
-0
lines changed

llvm/test/CodeGen/X86/musttail-struct.ll

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,225 @@ define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(
8686
%r = musttail call i32 @swap(ptr byval(%struct.1xi32) %1, ptr byval(%struct.1xi32) %0)
8787
ret i32 %r
8888
}
89+
90+
; Clang only uses byval for arguments of 65 bytes or larger, but e.g. rustc
91+
; does use byval for smaller types. Here we use a 20 byte struct to keep
92+
; the tests more readable.
93+
%twenty_bytes = type { [5 x i32] }
94+
declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
95+
96+
; Functions with byval parameters can be tail-called, because the value is
97+
; actually passed in registers and the stack in the same way for the caller and
98+
; callee. Within @large_caller the first 16 bytes of the argument are spilled
99+
; to the local stack frame, but for the tail-call they are passed in r0-r3, so
100+
; it's safe to de-allocate that memory before the call.
101+
; TODO: The SUB and STM instructions are unnecessary and could be optimised
102+
; out, but the behaviour of this is still correct.
103+
define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
104+
; CHECK-LABEL: large_caller:
105+
; CHECK: @ %bb.0: @ %entry
106+
; CHECK-NEXT: .pad #16
107+
; CHECK-NEXT: sub sp, sp, #16
108+
; CHECK-NEXT: stm sp!, {r0, r1, r2, r3}
109+
; CHECK-NEXT: b large_callee
110+
entry:
111+
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
112+
ret void
113+
}
114+
115+
; As above, but with some inline asm to test that the arguments in r0-r3 are
116+
; re-loaded before the call.
117+
define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
118+
; CHECK-LABEL: large_caller_check_regs:
119+
; CHECK: @ %bb.0: @ %entry
120+
; CHECK-NEXT: .pad #16
121+
; CHECK-NEXT: sub sp, sp, #16
122+
; CHECK-NEXT: stm sp, {r0, r1, r2, r3}
123+
; CHECK-NEXT: @APP
124+
; CHECK-NEXT: @NO_APP
125+
; CHECK-NEXT: pop {r0, r1, r2, r3}
126+
; CHECK-NEXT: b large_callee
127+
entry:
128+
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
129+
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
130+
ret void
131+
}
132+
133+
; The IR for this one looks dodgy, because it has an alloca passed to a
134+
; musttail function, but it is passed as a byval argument, so will be copied
135+
; into the stack space allocated by @large_caller_new_value's caller, so is
136+
; valid.
137+
define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
138+
; CHECK-LABEL: large_caller_new_value:
139+
; CHECK: @ %bb.0: @ %entry
140+
; CHECK-NEXT: .pad #36
141+
; CHECK-NEXT: sub sp, sp, #36
142+
; CHECK-NEXT: add r12, sp, #20
143+
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
144+
; CHECK-NEXT: mov r0, #4
145+
; CHECK-NEXT: add r1, sp, #36
146+
; CHECK-NEXT: str r0, [sp, #16]
147+
; CHECK-NEXT: mov r0, #3
148+
; CHECK-NEXT: str r0, [sp, #12]
149+
; CHECK-NEXT: mov r0, #2
150+
; CHECK-NEXT: str r0, [sp, #8]
151+
; CHECK-NEXT: mov r0, #1
152+
; CHECK-NEXT: str r0, [sp, #4]
153+
; CHECK-NEXT: mov r0, #0
154+
; CHECK-NEXT: str r0, [sp]
155+
; CHECK-NEXT: mov r0, sp
156+
; CHECK-NEXT: add r0, r0, #16
157+
; CHECK-NEXT: mov r3, #3
158+
; CHECK-NEXT: ldr r2, [r0], #4
159+
; CHECK-NEXT: str r2, [r1], #4
160+
; CHECK-NEXT: mov r0, #0
161+
; CHECK-NEXT: mov r1, #1
162+
; CHECK-NEXT: mov r2, #2
163+
; CHECK-NEXT: add sp, sp, #36
164+
; CHECK-NEXT: b large_callee
165+
entry:
166+
%y = alloca %twenty_bytes, align 4
167+
store i32 0, ptr %y, align 4
168+
%0 = getelementptr inbounds i8, ptr %y, i32 4
169+
store i32 1, ptr %0, align 4
170+
%1 = getelementptr inbounds i8, ptr %y, i32 8
171+
store i32 2, ptr %1, align 4
172+
%2 = getelementptr inbounds i8, ptr %y, i32 12
173+
store i32 3, ptr %2, align 4
174+
%3 = getelementptr inbounds i8, ptr %y, i32 16
175+
store i32 4, ptr %3, align 4
176+
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y)
177+
ret void
178+
}
179+
180+
declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
181+
define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
182+
; CHECK-LABEL: swap_byvals:
183+
; CHECK: @ %bb.0: @ %entry
184+
; CHECK-NEXT: .pad #16
185+
; CHECK-NEXT: sub sp, sp, #16
186+
; CHECK-NEXT: .save {r4, r5, r11, lr}
187+
; CHECK-NEXT: push {r4, r5, r11, lr}
188+
; CHECK-NEXT: .pad #40
189+
; CHECK-NEXT: sub sp, sp, #40
190+
; CHECK-NEXT: add r12, sp, #56
191+
; CHECK-NEXT: add lr, sp, #20
192+
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
193+
; CHECK-NEXT: add r0, sp, #56
194+
; CHECK-NEXT: mov r12, sp
195+
; CHECK-NEXT: ldr r1, [r0], #4
196+
; CHECK-NEXT: mov r2, r12
197+
; CHECK-NEXT: str r1, [r2], #4
198+
; CHECK-NEXT: add r3, sp, #20
199+
; CHECK-NEXT: ldr r1, [r0], #4
200+
; CHECK-NEXT: add r4, sp, #76
201+
; CHECK-NEXT: str r1, [r2], #4
202+
; CHECK-NEXT: ldr r1, [r0], #4
203+
; CHECK-NEXT: str r1, [r2], #4
204+
; CHECK-NEXT: ldr r1, [r0], #4
205+
; CHECK-NEXT: str r1, [r2], #4
206+
; CHECK-NEXT: ldr r1, [r0], #4
207+
; CHECK-NEXT: add r0, sp, #76
208+
; CHECK-NEXT: str r1, [r2], #4
209+
; CHECK-NEXT: mov r2, lr
210+
; CHECK-NEXT: ldr r1, [r0], #4
211+
; CHECK-NEXT: str r1, [r2], #4
212+
; CHECK-NEXT: ldr r1, [r0], #4
213+
; CHECK-NEXT: str r1, [r2], #4
214+
; CHECK-NEXT: ldr r1, [r0], #4
215+
; CHECK-NEXT: str r1, [r2], #4
216+
; CHECK-NEXT: ldr r1, [r0], #4
217+
; CHECK-NEXT: str r1, [r2], #4
218+
; CHECK-NEXT: ldr r1, [r0], #4
219+
; CHECK-NEXT: str r1, [r2], #4
220+
; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
221+
; CHECK-NEXT: ldr r5, [r12], #4
222+
; CHECK-NEXT: str r5, [r4], #4
223+
; CHECK-NEXT: ldr r5, [r12], #4
224+
; CHECK-NEXT: str r5, [r4], #4
225+
; CHECK-NEXT: ldr r5, [r12], #4
226+
; CHECK-NEXT: str r5, [r4], #4
227+
; CHECK-NEXT: ldr r5, [r12], #4
228+
; CHECK-NEXT: str r5, [r4], #4
229+
; CHECK-NEXT: ldr r5, [r12], #4
230+
; CHECK-NEXT: str r5, [r4], #4
231+
; CHECK-NEXT: add r5, lr, #16
232+
; CHECK-NEXT: add r12, sp, #72
233+
; CHECK-NEXT: ldr r4, [r5], #4
234+
; CHECK-NEXT: str r4, [r12], #4
235+
; CHECK-NEXT: add sp, sp, #40
236+
; CHECK-NEXT: pop {r4, r5, r11, lr}
237+
; CHECK-NEXT: add sp, sp, #16
238+
; CHECK-NEXT: b two_byvals_callee
239+
entry:
240+
musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
241+
ret void
242+
}
243+
244+
; A forwarded byval arg, but at a different offset on the stack, so it needs to
245+
; be copied to the local stack frame first. This can't be musttail because of
246+
; the different signatures, but is still tail-called as an optimisation.
247+
declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
248+
define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
249+
; CHECK-LABEL: shift_byval:
250+
; CHECK: @ %bb.0: @ %entry
251+
; CHECK-NEXT: .pad #12
252+
; CHECK-NEXT: sub sp, sp, #12
253+
; CHECK-NEXT: .save {r4, lr}
254+
; CHECK-NEXT: push {r4, lr}
255+
; CHECK-NEXT: .pad #20
256+
; CHECK-NEXT: sub sp, sp, #20
257+
; CHECK-NEXT: add r0, sp, #28
258+
; CHECK-NEXT: add lr, sp, #40
259+
; CHECK-NEXT: stm r0, {r1, r2, r3}
260+
; CHECK-NEXT: add r0, sp, #28
261+
; CHECK-NEXT: mov r1, sp
262+
; CHECK-NEXT: ldr r2, [r0], #4
263+
; CHECK-NEXT: add r12, r1, #16
264+
; CHECK-NEXT: str r2, [r1], #4
265+
; CHECK-NEXT: ldr r2, [r0], #4
266+
; CHECK-NEXT: str r2, [r1], #4
267+
; CHECK-NEXT: ldr r2, [r0], #4
268+
; CHECK-NEXT: str r2, [r1], #4
269+
; CHECK-NEXT: ldr r2, [r0], #4
270+
; CHECK-NEXT: str r2, [r1], #4
271+
; CHECK-NEXT: ldr r2, [r0], #4
272+
; CHECK-NEXT: str r2, [r1], #4
273+
; CHECK-NEXT: ldm sp, {r0, r1, r2, r3}
274+
; CHECK-NEXT: ldr r4, [r12], #4
275+
; CHECK-NEXT: str r4, [lr], #4
276+
; CHECK-NEXT: add sp, sp, #20
277+
; CHECK-NEXT: pop {r4, lr}
278+
; CHECK-NEXT: add sp, sp, #12
279+
; CHECK-NEXT: b shift_byval_callee
280+
entry:
281+
tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
282+
ret void
283+
}
284+
285+
; A global object passed to a byval argument, so it must be copied, but doesn't
286+
; need a stack temporary.
287+
@large_global = external global %twenty_bytes
288+
define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
289+
; CHECK-LABEL: large_caller_from_global:
290+
; CHECK: @ %bb.0: @ %entry
291+
; CHECK-NEXT: .pad #16
292+
; CHECK-NEXT: sub sp, sp, #16
293+
; CHECK-NEXT: .save {r4, lr}
294+
; CHECK-NEXT: push {r4, lr}
295+
; CHECK-NEXT: add r12, sp, #8
296+
; CHECK-NEXT: add lr, sp, #24
297+
; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
298+
; CHECK-NEXT: movw r3, :lower16:large_global
299+
; CHECK-NEXT: movt r3, :upper16:large_global
300+
; CHECK-NEXT: add r12, r3, #16
301+
; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
302+
; CHECK-NEXT: ldr r4, [r12], #4
303+
; CHECK-NEXT: str r4, [lr], #4
304+
; CHECK-NEXT: pop {r4, lr}
305+
; CHECK-NEXT: add sp, sp, #16
306+
; CHECK-NEXT: b large_callee
307+
entry:
308+
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
309+
ret void
310+
}

0 commit comments

Comments
 (0)