Skip to content

Commit 6c75b24

Browse files
committed
riscv: add musttail test
1 parent 2945d99 commit 6c75b24

File tree

1 file changed

+395
-0
lines changed

1 file changed

+395
-0
lines changed
Lines changed: 395 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,395 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32
3+
; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64
4+
5+
declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
6+
7+
define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
8+
; RV32-LABEL: many_args_tail:
9+
; RV32: # %bb.0:
10+
; RV32-NEXT: li a0, 9
11+
; RV32-NEXT: li t0, 8
12+
; RV32-NEXT: li a1, 1
13+
; RV32-NEXT: li a2, 2
14+
; RV32-NEXT: li a3, 3
15+
; RV32-NEXT: li a4, 4
16+
; RV32-NEXT: li a5, 5
17+
; RV32-NEXT: li a6, 6
18+
; RV32-NEXT: sw a0, 4(sp)
19+
; RV32-NEXT: li a7, 7
20+
; RV32-NEXT: sw t0, 0(sp)
21+
; RV32-NEXT: li a0, 0
22+
; RV32-NEXT: tail many_args_callee
23+
;
24+
; RV64-LABEL: many_args_tail:
25+
; RV64: # %bb.0:
26+
; RV64-NEXT: li a0, 9
27+
; RV64-NEXT: li t0, 8
28+
; RV64-NEXT: li a1, 1
29+
; RV64-NEXT: li a2, 2
30+
; RV64-NEXT: li a3, 3
31+
; RV64-NEXT: li a4, 4
32+
; RV64-NEXT: li a5, 5
33+
; RV64-NEXT: li a6, 6
34+
; RV64-NEXT: sd a0, 8(sp)
35+
; RV64-NEXT: li a7, 7
36+
; RV64-NEXT: sd t0, 0(sp)
37+
; RV64-NEXT: li a0, 0
38+
; RV64-NEXT: tail many_args_callee
39+
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
40+
ret i32 %ret
41+
}
42+
43+
define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
44+
; RV32-LABEL: many_args_musttail:
45+
; RV32: # %bb.0:
46+
; RV32-NEXT: li a0, 9
47+
; RV32-NEXT: li t0, 8
48+
; RV32-NEXT: li a1, 1
49+
; RV32-NEXT: li a2, 2
50+
; RV32-NEXT: li a3, 3
51+
; RV32-NEXT: li a4, 4
52+
; RV32-NEXT: li a5, 5
53+
; RV32-NEXT: li a6, 6
54+
; RV32-NEXT: sw a0, 4(sp)
55+
; RV32-NEXT: li a7, 7
56+
; RV32-NEXT: sw t0, 0(sp)
57+
; RV32-NEXT: li a0, 0
58+
; RV32-NEXT: tail many_args_callee
59+
;
60+
; RV64-LABEL: many_args_musttail:
61+
; RV64: # %bb.0:
62+
; RV64-NEXT: li a0, 9
63+
; RV64-NEXT: li t0, 8
64+
; RV64-NEXT: li a1, 1
65+
; RV64-NEXT: li a2, 2
66+
; RV64-NEXT: li a3, 3
67+
; RV64-NEXT: li a4, 4
68+
; RV64-NEXT: li a5, 5
69+
; RV64-NEXT: li a6, 6
70+
; RV64-NEXT: sd a0, 8(sp)
71+
; RV64-NEXT: li a7, 7
72+
; RV64-NEXT: sd t0, 0(sp)
73+
; RV64-NEXT: li a0, 0
74+
; RV64-NEXT: tail many_args_callee
75+
%ret = musttail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
76+
ret i32 %ret
77+
}
78+
79+
; This function has more arguments than it's tail-callee. This isn't valid for
80+
; the musttail attribute, but can still be tail-called as a non-guaranteed
81+
; optimisation, because the outgoing arguments to @many_args_callee fit in the
82+
; stack space allocated by the caller of @more_args_tail.
83+
define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
84+
; RV32-LABEL: more_args_tail:
85+
; RV32: # %bb.0:
86+
; RV32-NEXT: li a0, 9
87+
; RV32-NEXT: li t0, 8
88+
; RV32-NEXT: li a1, 1
89+
; RV32-NEXT: li a2, 2
90+
; RV32-NEXT: li a3, 3
91+
; RV32-NEXT: li a4, 4
92+
; RV32-NEXT: li a5, 5
93+
; RV32-NEXT: li a6, 6
94+
; RV32-NEXT: sw a0, 4(sp)
95+
; RV32-NEXT: li a7, 7
96+
; RV32-NEXT: sw t0, 0(sp)
97+
; RV32-NEXT: li a0, 0
98+
; RV32-NEXT: tail many_args_callee
99+
;
100+
; RV64-LABEL: more_args_tail:
101+
; RV64: # %bb.0:
102+
; RV64-NEXT: li a0, 9
103+
; RV64-NEXT: li t0, 8
104+
; RV64-NEXT: li a1, 1
105+
; RV64-NEXT: li a2, 2
106+
; RV64-NEXT: li a3, 3
107+
; RV64-NEXT: li a4, 4
108+
; RV64-NEXT: li a5, 5
109+
; RV64-NEXT: li a6, 6
110+
; RV64-NEXT: sd a0, 8(sp)
111+
; RV64-NEXT: li a7, 7
112+
; RV64-NEXT: sd t0, 0(sp)
113+
; RV64-NEXT: li a0, 0
114+
; RV64-NEXT: tail many_args_callee
115+
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
116+
ret i32 %ret
117+
}
118+
119+
; Again, this isn't valid for musttail, but can be tail-called in practice
120+
; because the stack size is the same.
121+
define i32 @different_args_tail_32bit(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4) {
122+
; RV32-LABEL: different_args_tail_32bit:
123+
; RV32: # %bb.0:
124+
; RV32-NEXT: li a0, 9
125+
; RV32-NEXT: li t0, 8
126+
; RV32-NEXT: li a1, 1
127+
; RV32-NEXT: li a2, 2
128+
; RV32-NEXT: li a3, 3
129+
; RV32-NEXT: li a4, 4
130+
; RV32-NEXT: li a5, 5
131+
; RV32-NEXT: li a6, 6
132+
; RV32-NEXT: sw a0, 4(sp)
133+
; RV32-NEXT: li a7, 7
134+
; RV32-NEXT: sw t0, 0(sp)
135+
; RV32-NEXT: li a0, 0
136+
; RV32-NEXT: tail many_args_callee
137+
;
138+
; RV64-LABEL: different_args_tail_32bit:
139+
; RV64: # %bb.0:
140+
; RV64-NEXT: addi sp, sp, -32
141+
; RV64-NEXT: .cfi_def_cfa_offset 32
142+
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
143+
; RV64-NEXT: .cfi_offset ra, -8
144+
; RV64-NEXT: li a0, 9
145+
; RV64-NEXT: li t0, 8
146+
; RV64-NEXT: li a1, 1
147+
; RV64-NEXT: li a2, 2
148+
; RV64-NEXT: li a3, 3
149+
; RV64-NEXT: li a4, 4
150+
; RV64-NEXT: li a5, 5
151+
; RV64-NEXT: li a6, 6
152+
; RV64-NEXT: li a7, 7
153+
; RV64-NEXT: sd t0, 0(sp)
154+
; RV64-NEXT: sd a0, 8(sp)
155+
; RV64-NEXT: li a0, 0
156+
; RV64-NEXT: call many_args_callee
157+
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
158+
; RV64-NEXT: .cfi_restore ra
159+
; RV64-NEXT: addi sp, sp, 32
160+
; RV64-NEXT: .cfi_def_cfa_offset 0
161+
; RV64-NEXT: ret
162+
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
163+
ret i32 %ret
164+
}
165+
166+
define i32 @different_args_tail_64bit(i128 %0, i128 %1, i128 %2, i128 %3, i128 %4) {
167+
; RV32-LABEL: different_args_tail_64bit:
168+
; RV32: # %bb.0:
169+
; RV32-NEXT: addi sp, sp, -16
170+
; RV32-NEXT: .cfi_def_cfa_offset 16
171+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
172+
; RV32-NEXT: .cfi_offset ra, -4
173+
; RV32-NEXT: li a0, 9
174+
; RV32-NEXT: li t0, 8
175+
; RV32-NEXT: li a1, 1
176+
; RV32-NEXT: li a2, 2
177+
; RV32-NEXT: li a3, 3
178+
; RV32-NEXT: li a4, 4
179+
; RV32-NEXT: li a5, 5
180+
; RV32-NEXT: li a6, 6
181+
; RV32-NEXT: li a7, 7
182+
; RV32-NEXT: sw t0, 0(sp)
183+
; RV32-NEXT: sw a0, 4(sp)
184+
; RV32-NEXT: li a0, 0
185+
; RV32-NEXT: call many_args_callee
186+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
187+
; RV32-NEXT: .cfi_restore ra
188+
; RV32-NEXT: addi sp, sp, 16
189+
; RV32-NEXT: .cfi_def_cfa_offset 0
190+
; RV32-NEXT: ret
191+
;
192+
; RV64-LABEL: different_args_tail_64bit:
193+
; RV64: # %bb.0:
194+
; RV64-NEXT: li a0, 9
195+
; RV64-NEXT: li t0, 8
196+
; RV64-NEXT: li a1, 1
197+
; RV64-NEXT: li a2, 2
198+
; RV64-NEXT: li a3, 3
199+
; RV64-NEXT: li a4, 4
200+
; RV64-NEXT: li a5, 5
201+
; RV64-NEXT: li a6, 6
202+
; RV64-NEXT: sd a0, 8(sp)
203+
; RV64-NEXT: li a7, 7
204+
; RV64-NEXT: sd t0, 0(sp)
205+
; RV64-NEXT: li a0, 0
206+
; RV64-NEXT: tail many_args_callee
207+
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
208+
ret i32 %ret
209+
}
210+
211+
; Here, the caller requires less stack space for it's arguments than the
212+
; callee, so it would not ba valid to do a tail-call.
213+
define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) {
214+
; RV32-LABEL: fewer_args_tail:
215+
; RV32: # %bb.0:
216+
; RV32-NEXT: addi sp, sp, -16
217+
; RV32-NEXT: .cfi_def_cfa_offset 16
218+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
219+
; RV32-NEXT: .cfi_offset ra, -4
220+
; RV32-NEXT: li a0, 9
221+
; RV32-NEXT: li t0, 8
222+
; RV32-NEXT: li a1, 1
223+
; RV32-NEXT: li a2, 2
224+
; RV32-NEXT: li a3, 3
225+
; RV32-NEXT: li a4, 4
226+
; RV32-NEXT: li a5, 5
227+
; RV32-NEXT: li a6, 6
228+
; RV32-NEXT: li a7, 7
229+
; RV32-NEXT: sw t0, 0(sp)
230+
; RV32-NEXT: sw a0, 4(sp)
231+
; RV32-NEXT: li a0, 0
232+
; RV32-NEXT: call many_args_callee
233+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
234+
; RV32-NEXT: .cfi_restore ra
235+
; RV32-NEXT: addi sp, sp, 16
236+
; RV32-NEXT: .cfi_def_cfa_offset 0
237+
; RV32-NEXT: ret
238+
;
239+
; RV64-LABEL: fewer_args_tail:
240+
; RV64: # %bb.0:
241+
; RV64-NEXT: addi sp, sp, -32
242+
; RV64-NEXT: .cfi_def_cfa_offset 32
243+
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
244+
; RV64-NEXT: .cfi_offset ra, -8
245+
; RV64-NEXT: li a0, 9
246+
; RV64-NEXT: li t0, 8
247+
; RV64-NEXT: li a1, 1
248+
; RV64-NEXT: li a2, 2
249+
; RV64-NEXT: li a3, 3
250+
; RV64-NEXT: li a4, 4
251+
; RV64-NEXT: li a5, 5
252+
; RV64-NEXT: li a6, 6
253+
; RV64-NEXT: li a7, 7
254+
; RV64-NEXT: sd t0, 0(sp)
255+
; RV64-NEXT: sd a0, 8(sp)
256+
; RV64-NEXT: li a0, 0
257+
; RV64-NEXT: call many_args_callee
258+
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
259+
; RV64-NEXT: .cfi_restore ra
260+
; RV64-NEXT: addi sp, sp, 32
261+
; RV64-NEXT: .cfi_def_cfa_offset 0
262+
; RV64-NEXT: ret
263+
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
264+
ret i32 %ret
265+
}
266+
267+
declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32)
268+
269+
define void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) nounwind {
270+
; RV32-LABEL: bar:
271+
; RV32: # %bb.0: # %entry
272+
; RV32-NEXT: addi sp, sp, -48
273+
; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
274+
; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
275+
; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
276+
; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
277+
; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
278+
; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
279+
; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
280+
; RV32-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
281+
; RV32-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
282+
; RV32-NEXT: mv s0, a7
283+
; RV32-NEXT: mv s1, a6
284+
; RV32-NEXT: mv s2, a5
285+
; RV32-NEXT: mv s3, a4
286+
; RV32-NEXT: mv s4, a3
287+
; RV32-NEXT: mv s5, a2
288+
; RV32-NEXT: mv s6, a1
289+
; RV32-NEXT: mv s7, a0
290+
; RV32-NEXT: li a0, 1
291+
; RV32-NEXT: sw a0, 0(sp)
292+
; RV32-NEXT: mv a0, s7
293+
; RV32-NEXT: call foo
294+
; RV32-NEXT: li a0, 2
295+
; RV32-NEXT: sw a0, 48(sp)
296+
; RV32-NEXT: mv a0, s7
297+
; RV32-NEXT: mv a1, s6
298+
; RV32-NEXT: mv a2, s5
299+
; RV32-NEXT: mv a3, s4
300+
; RV32-NEXT: mv a4, s3
301+
; RV32-NEXT: mv a5, s2
302+
; RV32-NEXT: mv a6, s1
303+
; RV32-NEXT: mv a7, s0
304+
; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
305+
; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
306+
; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
307+
; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
308+
; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
309+
; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
310+
; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
311+
; RV32-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
312+
; RV32-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
313+
; RV32-NEXT: addi sp, sp, 48
314+
; RV32-NEXT: tail foo
315+
;
316+
; RV64-LABEL: bar:
317+
; RV64: # %bb.0: # %entry
318+
; RV64-NEXT: addi sp, sp, -80
319+
; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
320+
; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
321+
; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
322+
; RV64-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
323+
; RV64-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
324+
; RV64-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
325+
; RV64-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
326+
; RV64-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
327+
; RV64-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
328+
; RV64-NEXT: mv s0, a7
329+
; RV64-NEXT: mv s1, a6
330+
; RV64-NEXT: mv s2, a5
331+
; RV64-NEXT: mv s3, a4
332+
; RV64-NEXT: mv s4, a3
333+
; RV64-NEXT: mv s5, a2
334+
; RV64-NEXT: mv s6, a1
335+
; RV64-NEXT: mv s7, a0
336+
; RV64-NEXT: li a0, 1
337+
; RV64-NEXT: sd a0, 0(sp)
338+
; RV64-NEXT: mv a0, s7
339+
; RV64-NEXT: call foo
340+
; RV64-NEXT: li a0, 2
341+
; RV64-NEXT: sd a0, 80(sp)
342+
; RV64-NEXT: mv a0, s7
343+
; RV64-NEXT: mv a1, s6
344+
; RV64-NEXT: mv a2, s5
345+
; RV64-NEXT: mv a3, s4
346+
; RV64-NEXT: mv a4, s3
347+
; RV64-NEXT: mv a5, s2
348+
; RV64-NEXT: mv a6, s1
349+
; RV64-NEXT: mv a7, s0
350+
; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
351+
; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
352+
; RV64-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
353+
; RV64-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
354+
; RV64-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
355+
; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
356+
; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
357+
; RV64-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
358+
; RV64-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
359+
; RV64-NEXT: addi sp, sp, 80
360+
; RV64-NEXT: tail foo
361+
entry:
362+
call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 1)
363+
musttail call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 2)
364+
ret void
365+
}
366+
367+
declare void @sret_callee(ptr sret({ double, double }) align 8)
368+
369+
; Functions which return by sret can be tail-called because the incoming sret
370+
; pointer gets passed through to the callee.
371+
define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) {
372+
; RV32-LABEL: sret_caller_tail:
373+
; RV32: # %bb.0: # %entry
374+
; RV32-NEXT: tail sret_callee
375+
;
376+
; RV64-LABEL: sret_caller_tail:
377+
; RV64: # %bb.0: # %entry
378+
; RV64-NEXT: tail sret_callee
379+
entry:
380+
tail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
381+
ret void
382+
}
383+
384+
define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) {
385+
; RV32-LABEL: sret_caller_musttail:
386+
; RV32: # %bb.0: # %entry
387+
; RV32-NEXT: tail sret_callee
388+
;
389+
; RV64-LABEL: sret_caller_musttail:
390+
; RV64: # %bb.0: # %entry
391+
; RV64-NEXT: tail sret_callee
392+
entry:
393+
musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
394+
ret void
395+
}

0 commit comments

Comments
 (0)