|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -x86-asm-syntax=intel | FileCheck %s |
3 | | -; ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s |
4 | 3 |
|
5 | 4 | ; Test correct handling of a musttail call with a byval struct argument. |
6 | 5 |
|
@@ -72,17 +71,117 @@ entry: |
72 | 71 | } |
73 | 72 |
|
74 | 73 | define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) { |
75 | | -; CHECK-LABEL: swapArguments: |
| 74 | +; CHECK-LABEL: swapByValArguments: |
76 | 75 | ; CHECK: # %bb.0: |
77 | | - |
78 | 76 | ; CHECK-NEXT: mov eax, dword ptr [rsp + 8] |
79 | 77 | ; CHECK-NEXT: mov dword ptr [rsp - 16], eax |
80 | | -; CHECK-NEXT: mov ecx, dword ptr [rsp + 16] |
81 | | -; CHECK-NEXT: mov dword ptr [rsp - 8], ecx |
82 | | - |
83 | | -; CHECK-NEXT: mov dword ptr [rsp + 8], ecx |
84 | | -; CHECK-NEXT: mov dword ptr [rsp + 16], eax |
| 78 | +; CHECK-NEXT: mov eax, dword ptr [rsp + 16] |
| 79 | +; CHECK-NEXT: mov dword ptr [rsp - 8], eax |
85 | 80 | ; CHECK-NEXT: jmp swap # TAILCALL |
| 81 | + |
| 82 | + |
86 | 83 | %r = musttail call i32 @swap(ptr byval(%struct.1xi32) %1, ptr byval(%struct.1xi32) %0) |
87 | 84 | ret i32 %r |
88 | 85 | } |
| 86 | + |
| 87 | +; Clang only uses byval for arguments of 65 bytes or larger, but e.g. rustc |
| 88 | +; does use byval for smaller types. Here we use a 20 byte struct to keep |
| 89 | +; the tests more readable. |
| 90 | +%twenty_bytes = type { [5 x i32] } |
| 91 | +declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| 92 | + |
| 93 | +; Functions with byval parameters can be tail-called, because the value is |
| 94 | +; actually passed in registers and the stack in the same way for the caller and |
| 95 | +; callee. On x86 byval arguments are never (partially) passed via registers. |
| 96 | +define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| 97 | +; CHECK-LABEL: large_caller: |
| 98 | +; CHECK: # %bb.0: # %entry |
| 99 | +; CHECK-NEXT: jmp large_callee@PLT # TAILCALL |
| 100 | +entry: |
| 101 | + musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| 102 | + ret void |
| 103 | +} |
| 104 | + |
| 105 | +; The IR for this one looks dodgy, because it has an alloca passed to a |
| 106 | +; musttail function, but it is passed as a byval argument, so will be copied |
| 107 | +; into the stack space allocated by @large_caller_new_value's caller, so is |
| 108 | +; valid. |
| 109 | +define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| 110 | +; CHECK-LABEL: large_caller_new_value: |
| 111 | +; CHECK: # %bb.0: # %entry |
| 112 | +; CHECK-NEXT: movabs rax, 4294967296 |
| 113 | +; CHECK-NEXT: mov qword ptr [rsp - 20], rax |
| 114 | +; CHECK-NEXT: movabs rcx, 12884901890 |
| 115 | +; CHECK-NEXT: mov qword ptr [rsp - 12], rcx |
| 116 | +; CHECK-NEXT: mov dword ptr [rsp - 4], 4 |
| 117 | +; CHECK-NEXT: mov qword ptr [rsp + 8], rax |
| 118 | +; CHECK-NEXT: mov qword ptr [rsp + 16], rcx |
| 119 | +; CHECK-NEXT: mov dword ptr [rsp + 24], 4 |
| 120 | +; CHECK-NEXT: jmp large_callee@PLT # TAILCALL |
| 121 | +entry: |
| 122 | + %y = alloca %twenty_bytes, align 4 |
| 123 | + store i32 0, ptr %y, align 4 |
| 124 | + %0 = getelementptr inbounds i8, ptr %y, i32 4 |
| 125 | + store i32 1, ptr %0, align 4 |
| 126 | + %1 = getelementptr inbounds i8, ptr %y, i32 8 |
| 127 | + store i32 2, ptr %1, align 4 |
| 128 | + %2 = getelementptr inbounds i8, ptr %y, i32 12 |
| 129 | + store i32 3, ptr %2, align 4 |
| 130 | + %3 = getelementptr inbounds i8, ptr %y, i32 16 |
| 131 | + store i32 4, ptr %3, align 4 |
| 132 | + musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y) |
| 133 | + ret void |
| 134 | +} |
| 135 | + |
| 136 | +declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4) |
| 137 | +define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| 138 | +; CHECK-LABEL: swap_byvals: |
| 139 | +; CHECK: # %bb.0: # %entry |
| 140 | +; CHECK-NEXT: mov eax, dword ptr [rsp + 24] |
| 141 | +; CHECK-NEXT: mov dword ptr [rsp - 8], eax |
| 142 | +; CHECK-NEXT: movaps xmm0, xmmword ptr [rsp + 8] |
| 143 | +; CHECK-NEXT: movaps xmmword ptr [rsp - 24], xmm0 |
| 144 | +; CHECK-NEXT: mov eax, dword ptr [rsp + 48] |
| 145 | +; CHECK-NEXT: mov dword ptr [rsp - 32], eax |
| 146 | +; CHECK-NEXT: mov rax, qword ptr [rsp + 32] |
| 147 | +; CHECK-NEXT: mov rcx, qword ptr [rsp + 40] |
| 148 | +; CHECK-NEXT: mov qword ptr [rsp - 40], rcx |
| 149 | +; CHECK-NEXT: mov qword ptr [rsp - 48], rax |
| 150 | +; CHECK-NEXT: jmp two_byvals_callee@PLT # TAILCALL |
| 151 | +entry: |
| 152 | + musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| 153 | + ret void |
| 154 | +} |
| 155 | + |
| 156 | +; A forwarded byval arg, but at a different argument position. Because |
| 157 | +; x86 does not (partially) pass byval arguments in registers, the byval |
| 158 | +; arg is in the correct position already, so this is not a sibcall but |
| 159 | +; can be tail-call optimized. |
| 160 | +declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| 161 | +define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| 162 | +; CHECK-LABEL: shift_byval: |
| 163 | +; CHECK: # %bb.0: # %entry |
| 164 | +; CHECK-NEXT: jmp shift_byval_callee@PLT # TAILCALL |
| 165 | +entry: |
| 166 | + tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b) |
| 167 | + ret void |
| 168 | +} |
| 169 | + |
| 170 | +; A global object passed to a byval argument, so it must be copied, but doesn't |
| 171 | +; need a stack temporary. |
| 172 | +@large_global = external global %twenty_bytes |
| 173 | +define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| 174 | +; CHECK-LABEL: large_caller_from_global: |
| 175 | +; CHECK: # %bb.0: # %entry |
| 176 | +; CHECK-NEXT: mov rax, qword ptr [rip + large_global@GOTPCREL] |
| 177 | +; CHECK-NEXT: mov ecx, dword ptr [rax + 16] |
| 178 | +; CHECK-NEXT: mov dword ptr [rsp + 24], ecx |
| 179 | +; CHECK-NEXT: mov rcx, qword ptr [rax] |
| 180 | +; CHECK-NEXT: mov rax, qword ptr [rax + 8] |
| 181 | +; CHECK-NEXT: mov qword ptr [rsp + 16], rax |
| 182 | +; CHECK-NEXT: mov qword ptr [rsp + 8], rcx |
| 183 | +; CHECK-NEXT: jmp large_callee@PLT # TAILCALL |
| 184 | +entry: |
| 185 | + musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global) |
| 186 | + ret void |
| 187 | +} |
0 commit comments