|
5 | 5 | target triple = "aarch64" |
6 | 6 |
|
7 | 7 | declare i64 @private_za_decl(i64) |
| 8 | +declare void @private_za() |
8 | 9 | declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic" |
9 | 10 |
|
10 | 11 | ; No calls. Test that no buffer is allocated. |
@@ -361,3 +362,68 @@ define i64 @test_many_callee_arguments( |
361 | 362 | i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) |
362 | 363 | ret i64 %ret |
363 | 364 | } |
| 365 | + |
| 366 | +; FIXME: The new lowering should avoid saves/restores in the probing loop. |
| 367 | +define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{ |
| 368 | +; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes: |
| 369 | +; CHECK: // %bb.0: |
| 370 | +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| 371 | +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| 372 | +; CHECK-NEXT: mov x29, sp |
| 373 | +; CHECK-NEXT: bl __arm_sme_state_size |
| 374 | +; CHECK-NEXT: mov x8, sp |
| 375 | +; CHECK-NEXT: sub x19, x8, x0 |
| 376 | +; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 |
| 377 | +; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| 378 | +; CHECK-NEXT: cmp sp, x19 |
| 379 | +; CHECK-NEXT: b.le .LBB7_3 |
| 380 | +; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 |
| 381 | +; CHECK-NEXT: str xzr, [sp] |
| 382 | +; CHECK-NEXT: b .LBB7_1 |
| 383 | +; CHECK-NEXT: .LBB7_3: |
| 384 | +; CHECK-NEXT: mov sp, x19 |
| 385 | +; CHECK-NEXT: ldr xzr, [sp] |
| 386 | +; CHECK-NEXT: mov x0, x19 |
| 387 | +; CHECK-NEXT: bl __arm_sme_save |
| 388 | +; CHECK-NEXT: bl private_za |
| 389 | +; CHECK-NEXT: mov x0, x19 |
| 390 | +; CHECK-NEXT: bl __arm_sme_restore |
| 391 | +; CHECK-NEXT: mov sp, x29 |
| 392 | +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| 393 | +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| 394 | +; CHECK-NEXT: ret |
| 395 | +; |
| 396 | +; CHECK-NEWLOWERING-LABEL: agnostic_za_buffer_alloc_with_stack_probes: |
| 397 | +; CHECK-NEWLOWERING: // %bb.0: |
| 398 | +; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| 399 | +; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| 400 | +; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| 401 | +; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size |
| 402 | +; CHECK-NEWLOWERING-NEXT: mov x8, sp |
| 403 | +; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0 |
| 404 | +; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 |
| 405 | +; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| 406 | +; CHECK-NEWLOWERING-NEXT: cmp sp, x19 |
| 407 | +; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| 408 | +; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV |
| 409 | +; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save |
| 410 | +; CHECK-NEWLOWERING-NEXT: msr NZCV, x8 |
| 411 | +; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3 |
| 412 | +; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 |
| 413 | +; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| 414 | +; CHECK-NEWLOWERING-NEXT: str xzr, [sp] |
| 415 | +; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| 416 | +; CHECK-NEWLOWERING-NEXT: b .LBB7_1 |
| 417 | +; CHECK-NEWLOWERING-NEXT: .LBB7_3: |
| 418 | +; CHECK-NEWLOWERING-NEXT: mov sp, x19 |
| 419 | +; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp] |
| 420 | +; CHECK-NEWLOWERING-NEXT: bl private_za |
| 421 | +; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| 422 | +; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| 423 | +; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| 424 | +; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| 425 | +; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| 426 | +; CHECK-NEWLOWERING-NEXT: ret |
| 427 | + call void @private_za() |
| 428 | + ret void |
| 429 | +} |
0 commit comments