|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
2 |
| -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme,+sme2p1 < %s -o - | FileCheck %s |
| 2 | +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme,+sme2p1 < %s | FileCheck %s |
3 | 3 |
|
4 | 4 | target triple = "aarch64-unknown-linux-gnu"
|
5 | 5 |
|
6 | 6 | declare void @bar_enabled(<vscale x 4 x i32>) #0
|
| 7 | +declare void @bar(<vscale x 4 x i32>) |
| 8 | +declare <vscale x 4 x i32> @bar_retv_enabled() #0 |
| 9 | +declare <vscale x 4 x i32> @bar_retv() |
7 | 10 |
|
8 | 11 | ; Non-streaming -> calls streaming callee
|
9 | 12 | define void @foo_non_streaming_pass_arg(ptr %arg) {
|
@@ -107,24 +110,22 @@ define void @foo_streaming_compatible_pass_arg(ptr %arg) #1 {
|
107 | 110 | ; CHECK-NEXT: .cfi_offset b15, -1136
|
108 | 111 | ; CHECK-NEXT: sub sp, sp, #1024
|
109 | 112 | ; CHECK-NEXT: addvl sp, sp, #-1
|
110 |
| -; CHECK-NEXT: mov x8, x0 |
111 |
| -; CHECK-NEXT: bl __arm_sme_state |
112 |
| -; CHECK-NEXT: rdvl x9, #1 |
113 |
| -; CHECK-NEXT: rdsvl x10, #1 |
114 |
| -; CHECK-NEXT: cmp x9, x10 |
| 113 | +; CHECK-NEXT: mrs x19, SVCR |
| 114 | +; CHECK-NEXT: rdvl x8, #1 |
| 115 | +; CHECK-NEXT: rdsvl x9, #1 |
| 116 | +; CHECK-NEXT: cmp x8, x9 |
115 | 117 | ; CHECK-NEXT: b.eq .LBB1_2
|
116 | 118 | ; CHECK-NEXT: // %bb.1: // %entry
|
117 | 119 | ; CHECK-NEXT: brk #0x1
|
118 | 120 | ; CHECK-NEXT: .LBB1_2: // %entry
|
119 |
| -; CHECK-NEXT: ldr z0, [x8] |
| 121 | +; CHECK-NEXT: ldr z0, [x0] |
120 | 122 | ; CHECK-NEXT: sub x8, x29, #1088
|
121 | 123 | ; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
|
122 |
| -; CHECK-NEXT: tbnz w0, #0, .LBB1_4 |
| 124 | +; CHECK-NEXT: tbnz w19, #0, .LBB1_4 |
123 | 125 | ; CHECK-NEXT: // %bb.3: // %entry
|
124 | 126 | ; CHECK-NEXT: smstart sm
|
125 | 127 | ; CHECK-NEXT: .LBB1_4: // %entry
|
126 | 128 | ; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
|
127 |
| -; CHECK-NEXT: mov x19, x0 |
128 | 129 | ; CHECK-NEXT: bl bar_enabled
|
129 | 130 | ; CHECK-NEXT: tbnz w19, #0, .LBB1_6
|
130 | 131 | ; CHECK-NEXT: // %bb.5: // %entry
|
@@ -163,7 +164,75 @@ entry:
|
163 | 164 | ret void
|
164 | 165 | }
|
165 | 166 |
|
166 |
| -declare <vscale x 4 x i32> @bar_retv_enabled() #0 |
| 167 | +; Streaming -> calls non-streaming callee |
| 168 | +define void @foo_streaming_pass_arg(ptr %arg) #0 { |
| 169 | +; CHECK-LABEL: foo_streaming_pass_arg: |
| 170 | +; CHECK: // %bb.0: // %entry |
| 171 | +; CHECK-NEXT: sub sp, sp, #1120 |
| 172 | +; CHECK-NEXT: .cfi_def_cfa_offset 1120 |
| 173 | +; CHECK-NEXT: cntd x9 |
| 174 | +; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill |
| 175 | +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| 176 | +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| 177 | +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| 178 | +; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill |
| 179 | +; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill |
| 180 | +; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill |
| 181 | +; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill |
| 182 | +; CHECK-NEXT: .cfi_offset w28, -8 |
| 183 | +; CHECK-NEXT: .cfi_offset vg, -16 |
| 184 | +; CHECK-NEXT: .cfi_offset w30, -24 |
| 185 | +; CHECK-NEXT: .cfi_offset w29, -32 |
| 186 | +; CHECK-NEXT: .cfi_offset b8, -1064 |
| 187 | +; CHECK-NEXT: .cfi_offset b9, -1072 |
| 188 | +; CHECK-NEXT: .cfi_offset b10, -1080 |
| 189 | +; CHECK-NEXT: .cfi_offset b11, -1088 |
| 190 | +; CHECK-NEXT: .cfi_offset b12, -1096 |
| 191 | +; CHECK-NEXT: .cfi_offset b13, -1104 |
| 192 | +; CHECK-NEXT: .cfi_offset b14, -1112 |
| 193 | +; CHECK-NEXT: .cfi_offset b15, -1120 |
| 194 | +; CHECK-NEXT: sub sp, sp, #1024 |
| 195 | +; CHECK-NEXT: .cfi_def_cfa_offset 2144 |
| 196 | +; CHECK-NEXT: smstop sm |
| 197 | +; CHECK-NEXT: rdvl x8, #1 |
| 198 | +; CHECK-NEXT: rdsvl x9, #1 |
| 199 | +; CHECK-NEXT: cmp x8, x9 |
| 200 | +; CHECK-NEXT: b.eq .LBB2_2 |
| 201 | +; CHECK-NEXT: // %bb.1: // %entry |
| 202 | +; CHECK-NEXT: brk #0x1 |
| 203 | +; CHECK-NEXT: .LBB2_2: // %entry |
| 204 | +; CHECK-NEXT: ldr z0, [x0] |
| 205 | +; CHECK-NEXT: bl bar |
| 206 | +; CHECK-NEXT: smstart sm |
| 207 | +; CHECK-NEXT: add sp, sp, #1024 |
| 208 | +; CHECK-NEXT: .cfi_def_cfa_offset 1120 |
| 209 | +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| 210 | +; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload |
| 211 | +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| 212 | +; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload |
| 213 | +; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload |
| 214 | +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| 215 | +; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload |
| 216 | +; CHECK-NEXT: add sp, sp, #1120 |
| 217 | +; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| 218 | +; CHECK-NEXT: .cfi_restore w28 |
| 219 | +; CHECK-NEXT: .cfi_restore vg |
| 220 | +; CHECK-NEXT: .cfi_restore w30 |
| 221 | +; CHECK-NEXT: .cfi_restore w29 |
| 222 | +; CHECK-NEXT: .cfi_restore b8 |
| 223 | +; CHECK-NEXT: .cfi_restore b9 |
| 224 | +; CHECK-NEXT: .cfi_restore b10 |
| 225 | +; CHECK-NEXT: .cfi_restore b11 |
| 226 | +; CHECK-NEXT: .cfi_restore b12 |
| 227 | +; CHECK-NEXT: .cfi_restore b13 |
| 228 | +; CHECK-NEXT: .cfi_restore b14 |
| 229 | +; CHECK-NEXT: .cfi_restore b15 |
| 230 | +; CHECK-NEXT: ret |
| 231 | +entry: |
| 232 | + %v = load <vscale x 4 x i32>, ptr %arg, align 16 |
| 233 | + tail call void @bar(<vscale x 4 x i32> %v) |
| 234 | + ret void |
| 235 | +} |
167 | 236 |
|
168 | 237 | ; Non-streaming -> returns SVE value from streaming callee
|
169 | 238 | define void @foo_non_streaming_retval(ptr %ptr) {
|
@@ -197,10 +266,10 @@ define void @foo_non_streaming_retval(ptr %ptr) {
|
197 | 266 | ; CHECK-NEXT: rdvl x8, #1
|
198 | 267 | ; CHECK-NEXT: rdsvl x9, #1
|
199 | 268 | ; CHECK-NEXT: cmp x8, x9
|
200 |
| -; CHECK-NEXT: b.eq .LBB2_2 |
| 269 | +; CHECK-NEXT: b.eq .LBB3_2 |
201 | 270 | ; CHECK-NEXT: // %bb.1: // %entry
|
202 | 271 | ; CHECK-NEXT: brk #0x1
|
203 |
| -; CHECK-NEXT: .LBB2_2: // %entry |
| 272 | +; CHECK-NEXT: .LBB3_2: // %entry |
204 | 273 | ; CHECK-NEXT: mov x19, x0
|
205 | 274 | ; CHECK-NEXT: smstart sm
|
206 | 275 | ; CHECK-NEXT: bl bar_retv_enabled
|
@@ -273,27 +342,26 @@ define void @foo_streaming_compatible_retval(ptr %ptr) #1 {
|
273 | 342 | ; CHECK-NEXT: .cfi_offset b15, -1136
|
274 | 343 | ; CHECK-NEXT: sub sp, sp, #1024
|
275 | 344 | ; CHECK-NEXT: addvl sp, sp, #-1
|
276 |
| -; CHECK-NEXT: mov x19, x0 |
277 |
| -; CHECK-NEXT: bl __arm_sme_state |
| 345 | +; CHECK-NEXT: mrs x20, SVCR |
278 | 346 | ; CHECK-NEXT: rdvl x8, #1
|
279 | 347 | ; CHECK-NEXT: rdsvl x9, #1
|
280 | 348 | ; CHECK-NEXT: cmp x8, x9
|
281 |
| -; CHECK-NEXT: b.eq .LBB3_2 |
| 349 | +; CHECK-NEXT: b.eq .LBB4_2 |
282 | 350 | ; CHECK-NEXT: // %bb.1: // %entry
|
283 | 351 | ; CHECK-NEXT: brk #0x1
|
284 |
| -; CHECK-NEXT: .LBB3_2: // %entry |
285 |
| -; CHECK-NEXT: tbnz w0, #0, .LBB3_4 |
| 352 | +; CHECK-NEXT: .LBB4_2: // %entry |
| 353 | +; CHECK-NEXT: mov x19, x0 |
| 354 | +; CHECK-NEXT: tbnz w20, #0, .LBB4_4 |
286 | 355 | ; CHECK-NEXT: // %bb.3: // %entry
|
287 | 356 | ; CHECK-NEXT: smstart sm
|
288 |
| -; CHECK-NEXT: .LBB3_4: // %entry |
289 |
| -; CHECK-NEXT: mov x20, x0 |
| 357 | +; CHECK-NEXT: .LBB4_4: // %entry |
290 | 358 | ; CHECK-NEXT: bl bar_retv_enabled
|
291 | 359 | ; CHECK-NEXT: sub x8, x29, #1088
|
292 | 360 | ; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
|
293 |
| -; CHECK-NEXT: tbnz w20, #0, .LBB3_6 |
| 361 | +; CHECK-NEXT: tbnz w20, #0, .LBB4_6 |
294 | 362 | ; CHECK-NEXT: // %bb.5: // %entry
|
295 | 363 | ; CHECK-NEXT: smstop sm
|
296 |
| -; CHECK-NEXT: .LBB3_6: // %entry |
| 364 | +; CHECK-NEXT: .LBB4_6: // %entry |
297 | 365 | ; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload
|
298 | 366 | ; CHECK-NEXT: str z0, [x19]
|
299 | 367 | ; CHECK-NEXT: addvl sp, sp, #1
|
@@ -331,5 +399,86 @@ entry:
|
331 | 399 | ret void
|
332 | 400 | }
|
333 | 401 |
|
| 402 | +; Streaming -> returns SVE value from non-streaming callee |
| 403 | +define void @foo_streaming_retval(ptr %ptr) #0 { |
| 404 | +; CHECK-LABEL: foo_streaming_retval: |
| 405 | +; CHECK: // %bb.0: // %entry |
| 406 | +; CHECK-NEXT: sub sp, sp, #1136 |
| 407 | +; CHECK-NEXT: .cfi_def_cfa_offset 1136 |
| 408 | +; CHECK-NEXT: cntd x9 |
| 409 | +; CHECK-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill |
| 410 | +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| 411 | +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| 412 | +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| 413 | +; CHECK-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill |
| 414 | +; CHECK-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill |
| 415 | +; CHECK-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill |
| 416 | +; CHECK-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill |
| 417 | +; CHECK-NEXT: str x19, [sp, #1120] // 8-byte Folded Spill |
| 418 | +; CHECK-NEXT: add x29, sp, #1088 |
| 419 | +; CHECK-NEXT: .cfi_def_cfa w29, 48 |
| 420 | +; CHECK-NEXT: .cfi_offset w19, -16 |
| 421 | +; CHECK-NEXT: .cfi_offset w28, -24 |
| 422 | +; CHECK-NEXT: .cfi_offset vg, -32 |
| 423 | +; CHECK-NEXT: .cfi_offset w30, -40 |
| 424 | +; CHECK-NEXT: .cfi_offset w29, -48 |
| 425 | +; CHECK-NEXT: .cfi_offset b8, -1080 |
| 426 | +; CHECK-NEXT: .cfi_offset b9, -1088 |
| 427 | +; CHECK-NEXT: .cfi_offset b10, -1096 |
| 428 | +; CHECK-NEXT: .cfi_offset b11, -1104 |
| 429 | +; CHECK-NEXT: .cfi_offset b12, -1112 |
| 430 | +; CHECK-NEXT: .cfi_offset b13, -1120 |
| 431 | +; CHECK-NEXT: .cfi_offset b14, -1128 |
| 432 | +; CHECK-NEXT: .cfi_offset b15, -1136 |
| 433 | +; CHECK-NEXT: sub sp, sp, #1024 |
| 434 | +; CHECK-NEXT: addvl sp, sp, #-1 |
| 435 | +; CHECK-NEXT: smstop sm |
| 436 | +; CHECK-NEXT: rdvl x8, #1 |
| 437 | +; CHECK-NEXT: rdsvl x9, #1 |
| 438 | +; CHECK-NEXT: cmp x8, x9 |
| 439 | +; CHECK-NEXT: b.eq .LBB5_2 |
| 440 | +; CHECK-NEXT: // %bb.1: // %entry |
| 441 | +; CHECK-NEXT: brk #0x1 |
| 442 | +; CHECK-NEXT: .LBB5_2: // %entry |
| 443 | +; CHECK-NEXT: mov x19, x0 |
| 444 | +; CHECK-NEXT: bl bar_retv |
| 445 | +; CHECK-NEXT: sub x8, x29, #1088 |
| 446 | +; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill |
| 447 | +; CHECK-NEXT: smstart sm |
| 448 | +; CHECK-NEXT: ldr z0, [x8, #-1, mul vl] // 16-byte Folded Reload |
| 449 | +; CHECK-NEXT: str z0, [x19] |
| 450 | +; CHECK-NEXT: addvl sp, sp, #1 |
| 451 | +; CHECK-NEXT: add sp, sp, #1024 |
| 452 | +; CHECK-NEXT: .cfi_def_cfa wsp, 1136 |
| 453 | +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| 454 | +; CHECK-NEXT: ldr x19, [sp, #1120] // 8-byte Folded Reload |
| 455 | +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| 456 | +; CHECK-NEXT: ldr x28, [sp, #1112] // 8-byte Folded Reload |
| 457 | +; CHECK-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload |
| 458 | +; CHECK-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload |
| 459 | +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| 460 | +; CHECK-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload |
| 461 | +; CHECK-NEXT: add sp, sp, #1136 |
| 462 | +; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| 463 | +; CHECK-NEXT: .cfi_restore w19 |
| 464 | +; CHECK-NEXT: .cfi_restore w28 |
| 465 | +; CHECK-NEXT: .cfi_restore vg |
| 466 | +; CHECK-NEXT: .cfi_restore w30 |
| 467 | +; CHECK-NEXT: .cfi_restore w29 |
| 468 | +; CHECK-NEXT: .cfi_restore b8 |
| 469 | +; CHECK-NEXT: .cfi_restore b9 |
| 470 | +; CHECK-NEXT: .cfi_restore b10 |
| 471 | +; CHECK-NEXT: .cfi_restore b11 |
| 472 | +; CHECK-NEXT: .cfi_restore b12 |
| 473 | +; CHECK-NEXT: .cfi_restore b13 |
| 474 | +; CHECK-NEXT: .cfi_restore b14 |
| 475 | +; CHECK-NEXT: .cfi_restore b15 |
| 476 | +; CHECK-NEXT: ret |
| 477 | +entry: |
| 478 | + %v = tail call <vscale x 4 x i32> @bar_retv() |
| 479 | + store <vscale x 4 x i32> %v, ptr %ptr, align 16 |
| 480 | + ret void |
| 481 | +} |
| 482 | + |
334 | 483 | attributes #0 = { "aarch64_pstate_sm_enabled" }
|
335 | 484 | attributes #1 = { "aarch64_pstate_sm_compatible" }
|
0 commit comments