@@ -117,3 +117,115 @@ entry:
117117 musttail call void @sret_callee (ptr sret ({ double , double }) align 8 %result )
118118 ret void
119119}
120+
121+ ; Clang only uses byval for arguments of 65 bytes or larger, but we test with a
122+ ; 20 byte struct to keep the tests more readable. This size was chosen to still
123+ ; make sure that it will be split between registers and the stack, to test all
124+ ; of the interesting code paths in the backend.
125+ %twenty_bytes = type { [5 x i32 ] }
126+ declare void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
127+
128+ ; Functions with byval parameters can be tail-called, because the value is
129+ ; actually passed in registers and the stack in the same way for the caller and
130+ ; callee. Within @large_caller the first 16 bytes of the argument are spilled
131+ ; to the local stack frame, but for the tail-call they are passed in r0-r3, so
132+ ; it's safe to de-allocate that memory before the call. Most of the code
133+ ; generated for this isn't needed, but that's a missed optimisation, not a
134+ ; correctness issue.
135+ define void @large_caller (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
136+ ; CHECK-LABEL: large_caller:
137+ ; CHECK: @ %bb.0: @ %entry
138+ ; CHECK-NEXT: .pad #16
139+ ; CHECK-NEXT: sub sp, sp, #16
140+ ; CHECK-NEXT: .save {r4, lr}
141+ ; CHECK-NEXT: push {r4, lr}
142+ ; CHECK-NEXT: add r12, sp, #8
143+ ; CHECK-NEXT: add lr, sp, #24
144+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
145+ ; CHECK-NEXT: add r12, sp, #8
146+ ; CHECK-NEXT: add r12, r12, #16
147+ ; CHECK-NEXT: ldr r4, [r12], #4
148+ ; CHECK-NEXT: str r4, [lr], #4
149+ ; CHECK-NEXT: pop {r4, lr}
150+ ; CHECK-NEXT: add sp, sp, #16
151+ ; CHECK-NEXT: b large_callee
152+ entry:
153+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
154+ ret void
155+ }
156+
157+ ; As above, but with some inline asm to test that the arguments in r0-r3 are
158+ ; re-loaded before the call.
159+ define void @large_caller_check_regs (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
160+ ; CHECK-LABEL: large_caller_check_regs:
161+ ; CHECK: @ %bb.0: @ %entry
162+ ; CHECK-NEXT: .pad #16
163+ ; CHECK-NEXT: sub sp, sp, #16
164+ ; CHECK-NEXT: .save {r4, lr}
165+ ; CHECK-NEXT: push {r4, lr}
166+ ; CHECK-NEXT: add r12, sp, #8
167+ ; CHECK-NEXT: add lr, sp, #24
168+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
169+ ; CHECK-NEXT: @APP
170+ ; CHECK-NEXT: @NO_APP
171+ ; CHECK-NEXT: add r3, sp, #8
172+ ; CHECK-NEXT: add r0, sp, #8
173+ ; CHECK-NEXT: add r12, r0, #16
174+ ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3}
175+ ; CHECK-NEXT: ldr r4, [r12], #4
176+ ; CHECK-NEXT: str r4, [lr], #4
177+ ; CHECK-NEXT: pop {r4, lr}
178+ ; CHECK-NEXT: add sp, sp, #16
179+ ; CHECK-NEXT: b large_callee
180+ entry:
181+ tail call void asm sideeffect "" , "~{r0},~{r1},~{r2},~{r3}" ()
182+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
183+ ret void
184+ }
185+
186+ ; The IR for this one looks dodgy, because it has an alloca passed to a
187+ ; musttail function, but it is passed as a byval argument, so will be copied
188+ ; into the stack space allocated by @large_caller_new_value's caller, so is
189+ ; valid.
190+ define void @large_caller_new_value (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
191+ ; CHECK-LABEL: large_caller_new_value:
192+ ; CHECK: @ %bb.0: @ %entry
193+ ; CHECK-NEXT: .pad #36
194+ ; CHECK-NEXT: sub sp, sp, #36
195+ ; CHECK-NEXT: add r12, sp, #20
196+ ; CHECK-NEXT: stm r12, {r0, r1, r2, r3}
197+ ; CHECK-NEXT: mov r0, #4
198+ ; CHECK-NEXT: add r1, sp, #36
199+ ; CHECK-NEXT: str r0, [sp, #16]
200+ ; CHECK-NEXT: mov r0, #3
201+ ; CHECK-NEXT: str r0, [sp, #12]
202+ ; CHECK-NEXT: mov r0, #2
203+ ; CHECK-NEXT: str r0, [sp, #8]
204+ ; CHECK-NEXT: mov r0, #1
205+ ; CHECK-NEXT: str r0, [sp, #4]
206+ ; CHECK-NEXT: mov r0, #0
207+ ; CHECK-NEXT: str r0, [sp]
208+ ; CHECK-NEXT: mov r0, sp
209+ ; CHECK-NEXT: add r0, r0, #16
210+ ; CHECK-NEXT: mov r3, #3
211+ ; CHECK-NEXT: ldr r2, [r0], #4
212+ ; CHECK-NEXT: str r2, [r1], #4
213+ ; CHECK-NEXT: mov r0, #0
214+ ; CHECK-NEXT: mov r1, #1
215+ ; CHECK-NEXT: mov r2, #2
216+ ; CHECK-NEXT: add sp, sp, #36
217+ ; CHECK-NEXT: b large_callee
218+ entry:
219+ %y = alloca %twenty_bytes , align 4
220+ store i32 0 , ptr %y , align 4
221+ %0 = getelementptr inbounds i8 , ptr %y , i32 4
222+ store i32 1 , ptr %0 , align 4
223+ %1 = getelementptr inbounds i8 , ptr %y , i32 8
224+ store i32 2 , ptr %1 , align 4
225+ %2 = getelementptr inbounds i8 , ptr %y , i32 12
226+ store i32 3 , ptr %2 , align 4
227+ %3 = getelementptr inbounds i8 , ptr %y , i32 16
228+ store i32 4 , ptr %3 , align 4
229+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %y )
230+ ret void
231+ }
0 commit comments