@@ -393,3 +393,179 @@ entry:
393393 musttail call void @sret_callee (ptr sret ({ double , double }) align 8 %result )
394394 ret void
395395}
396+
397+ %twenty_bytes = type { [5 x i32 ] }
398+ declare void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
399+
400+ ; Functions with byval parameters can be tail-called, because the value is
401+ ; actually passed in registers in the same way for the caller and callee.
402+ define void @large_caller (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
403+ ; RV32-LABEL: large_caller:
404+ ; RV32: # %bb.0: # %entry
405+ ; RV32-NEXT: tail large_callee
406+ ;
407+ ; RV64-LABEL: large_caller:
408+ ; RV64: # %bb.0: # %entry
409+ ; RV64-NEXT: tail large_callee
410+ entry:
411+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
412+ ret void
413+ }
414+
415+ ; As above, but with some inline asm to test that the arguments in r4 is
416+ ; re-loaded before the call.
417+ define void @large_caller_check_regs (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) nounwind {
418+ ; RV32-LABEL: large_caller_check_regs:
419+ ; RV32: # %bb.0: # %entry
420+ ; RV32-NEXT: #APP
421+ ; RV32-NEXT: #NO_APP
422+ ; RV32-NEXT: tail large_callee
423+ ;
424+ ; RV64-LABEL: large_caller_check_regs:
425+ ; RV64: # %bb.0: # %entry
426+ ; RV64-NEXT: #APP
427+ ; RV64-NEXT: #NO_APP
428+ ; RV64-NEXT: tail large_callee
429+ entry:
430+ tail call void asm sideeffect "" , "~{r4}" ()
431+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %a )
432+ ret void
433+ }
434+
435+ ; The IR for this one looks dodgy, because it has an alloca passed to a
436+ ; musttail function, but it is passed as a byval argument, so will be copied
437+ ; into the stack space allocated by @large_caller_new_value's caller, so is
438+ ; valid.
439+ define void @large_caller_new_value (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) nounwind {
440+ ; RV32-LABEL: large_caller_new_value:
441+ ; RV32: # %bb.0: # %entry
442+ ; RV32-NEXT: addi sp, sp, -32
443+ ; RV32-NEXT: li a1, 1
444+ ; RV32-NEXT: li a2, 2
445+ ; RV32-NEXT: li a3, 3
446+ ; RV32-NEXT: li a4, 4
447+ ; RV32-NEXT: sw zero, 12(sp)
448+ ; RV32-NEXT: sw a1, 16(sp)
449+ ; RV32-NEXT: sw a2, 20(sp)
450+ ; RV32-NEXT: sw a3, 24(sp)
451+ ; RV32-NEXT: sw a4, 28(sp)
452+ ; RV32-NEXT: sw a4, 16(a0)
453+ ; RV32-NEXT: sw zero, 0(a0)
454+ ; RV32-NEXT: sw a1, 4(a0)
455+ ; RV32-NEXT: sw a2, 8(a0)
456+ ; RV32-NEXT: sw a3, 12(a0)
457+ ; RV32-NEXT: addi sp, sp, 32
458+ ; RV32-NEXT: tail large_callee
459+ ;
460+ ; RV64-LABEL: large_caller_new_value:
461+ ; RV64: # %bb.0: # %entry
462+ ; RV64-NEXT: addi sp, sp, -32
463+ ; RV64-NEXT: li a1, 1
464+ ; RV64-NEXT: li a2, 2
465+ ; RV64-NEXT: li a3, 3
466+ ; RV64-NEXT: li a4, 4
467+ ; RV64-NEXT: sw zero, 12(sp)
468+ ; RV64-NEXT: sw a1, 16(sp)
469+ ; RV64-NEXT: sw a2, 20(sp)
470+ ; RV64-NEXT: sw a3, 24(sp)
471+ ; RV64-NEXT: sw a4, 28(sp)
472+ ; RV64-NEXT: sw a4, 16(a0)
473+ ; RV64-NEXT: sw zero, 0(a0)
474+ ; RV64-NEXT: sw a1, 4(a0)
475+ ; RV64-NEXT: sw a2, 8(a0)
476+ ; RV64-NEXT: sw a3, 12(a0)
477+ ; RV64-NEXT: addi sp, sp, 32
478+ ; RV64-NEXT: tail large_callee
479+ entry:
480+ %y = alloca %twenty_bytes , align 4
481+ store i32 0 , ptr %y , align 4
482+ %0 = getelementptr inbounds i8 , ptr %y , i32 4
483+ store i32 1 , ptr %0 , align 4
484+ %1 = getelementptr inbounds i8 , ptr %y , i32 8
485+ store i32 2 , ptr %1 , align 4
486+ %2 = getelementptr inbounds i8 , ptr %y , i32 12
487+ store i32 3 , ptr %2 , align 4
488+ %3 = getelementptr inbounds i8 , ptr %y , i32 16
489+ store i32 4 , ptr %3 , align 4
490+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %y )
491+ ret void
492+ }
493+
494+ declare void @two_byvals_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 , %twenty_bytes* byval (%twenty_bytes ) align 4 )
495+ define void @swap_byvals (%twenty_bytes* byval (%twenty_bytes ) align 4 %a , %twenty_bytes* byval (%twenty_bytes ) align 4 %b ) {
496+ ; RV32-LABEL: swap_byvals:
497+ ; RV32: # %bb.0: # %entry
498+ ; RV32-NEXT: mv a2, a0
499+ ; RV32-NEXT: mv a0, a1
500+ ; RV32-NEXT: mv a1, a2
501+ ; RV32-NEXT: tail two_byvals_callee
502+ ;
503+ ; RV64-LABEL: swap_byvals:
504+ ; RV64: # %bb.0: # %entry
505+ ; RV64-NEXT: mv a2, a0
506+ ; RV64-NEXT: mv a0, a1
507+ ; RV64-NEXT: mv a1, a2
508+ ; RV64-NEXT: tail two_byvals_callee
509+ entry:
510+ musttail call void @two_byvals_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %b , %twenty_bytes* byval (%twenty_bytes ) align 4 %a )
511+ ret void
512+ }
513+
514+ ; A forwarded byval arg, but in a different argument register, so it needs to
515+ ; be moved between registers first. This can't be musttail because of the
516+ ; different signatures, but is still tail-called as an optimisation.
517+ declare void @shift_byval_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 )
518+ define void @shift_byval (i32 %a , %twenty_bytes* byval (%twenty_bytes ) align 4 %b ) {
519+ ; RV32-LABEL: shift_byval:
520+ ; RV32: # %bb.0: # %entry
521+ ; RV32-NEXT: mv a0, a1
522+ ; RV32-NEXT: tail shift_byval_callee
523+ ;
524+ ; RV64-LABEL: shift_byval:
525+ ; RV64: # %bb.0: # %entry
526+ ; RV64-NEXT: mv a0, a1
527+ ; RV64-NEXT: tail shift_byval_callee
528+ entry:
529+ tail call void @shift_byval_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 %b )
530+ ret void
531+ }
532+
533+ ; A global object passed to a byval argument, so it must be copied, but doesn't
534+ ; need a stack temporary.
535+ @large_global = external global %twenty_bytes
536+ define void @large_caller_from_global (%twenty_bytes* byval (%twenty_bytes ) align 4 %a ) {
537+ ; RV32-LABEL: large_caller_from_global:
538+ ; RV32: # %bb.0: # %entry
539+ ; RV32-NEXT: lui a1, %hi(large_global)
540+ ; RV32-NEXT: addi a1, a1, %lo(large_global)
541+ ; RV32-NEXT: lw a2, 16(a1)
542+ ; RV32-NEXT: sw a2, 16(a0)
543+ ; RV32-NEXT: lw a2, 12(a1)
544+ ; RV32-NEXT: sw a2, 12(a0)
545+ ; RV32-NEXT: lw a2, 8(a1)
546+ ; RV32-NEXT: sw a2, 8(a0)
547+ ; RV32-NEXT: lw a2, 4(a1)
548+ ; RV32-NEXT: sw a2, 4(a0)
549+ ; RV32-NEXT: lw a1, 0(a1)
550+ ; RV32-NEXT: sw a1, 0(a0)
551+ ; RV32-NEXT: tail large_callee
552+ ;
553+ ; RV64-LABEL: large_caller_from_global:
554+ ; RV64: # %bb.0: # %entry
555+ ; RV64-NEXT: lui a1, %hi(large_global)
556+ ; RV64-NEXT: addi a1, a1, %lo(large_global)
557+ ; RV64-NEXT: lw a2, 16(a1)
558+ ; RV64-NEXT: sw a2, 16(a0)
559+ ; RV64-NEXT: lw a2, 12(a1)
560+ ; RV64-NEXT: sw a2, 12(a0)
561+ ; RV64-NEXT: lw a2, 8(a1)
562+ ; RV64-NEXT: sw a2, 8(a0)
563+ ; RV64-NEXT: lw a2, 4(a1)
564+ ; RV64-NEXT: sw a2, 4(a0)
565+ ; RV64-NEXT: lw a1, 0(a1)
566+ ; RV64-NEXT: sw a1, 0(a0)
567+ ; RV64-NEXT: tail large_callee
568+ entry:
569+ musttail call void @large_callee (%twenty_bytes* byval (%twenty_bytes ) align 4 @large_global )
570+ ret void
571+ }
0 commit comments