|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
2 |
| -; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s |
| 2 | +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-new-sme-abi -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s |
3 | 3 |
|
4 | 4 | declare void @callee()
|
5 | 5 | declare void @callee_sm() "aarch64_pstate_sm_enabled"
|
@@ -563,3 +563,128 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" {
|
563 | 563 | store <vscale x 4 x float> %res1, ptr %ptr
|
564 | 564 | ret void
|
565 | 565 | }
|
| 566 | + |
| 567 | +; normal caller -> streaming callees (with ZA state) |
| 568 | +define void @test14(ptr %callee) nounwind "aarch64_inout_za" { |
| 569 | +; CHECK-LABEL: test14: |
| 570 | +; CHECK: // %bb.0: |
| 571 | +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| 572 | +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| 573 | +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| 574 | +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| 575 | +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| 576 | +; CHECK-NEXT: add x29, sp, #64 |
| 577 | +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill |
| 578 | +; CHECK-NEXT: sub sp, sp, #16 |
| 579 | +; CHECK-NEXT: rdsvl x8, #1 |
| 580 | +; CHECK-NEXT: mov x9, sp |
| 581 | +; CHECK-NEXT: msub x9, x8, x8, x9 |
| 582 | +; CHECK-NEXT: mov sp, x9 |
| 583 | +; CHECK-NEXT: sub x10, x29, #80 |
| 584 | +; CHECK-NEXT: stp x9, x8, [x29, #-80] |
| 585 | +; CHECK-NEXT: msr TPIDR2_EL0, x10 |
| 586 | +; CHECK-NEXT: smstart sm |
| 587 | +; CHECK-NEXT: bl callee_sm |
| 588 | +; CHECK-NEXT: bl callee_sm |
| 589 | +; CHECK-NEXT: smstop sm |
| 590 | +; CHECK-NEXT: smstart za |
| 591 | +; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| 592 | +; CHECK-NEXT: sub x0, x29, #80 |
| 593 | +; CHECK-NEXT: cbnz x8, .LBB15_2 |
| 594 | +; CHECK-NEXT: // %bb.1: |
| 595 | +; CHECK-NEXT: bl __arm_tpidr2_restore |
| 596 | +; CHECK-NEXT: .LBB15_2: |
| 597 | +; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| 598 | +; CHECK-NEXT: sub sp, x29, #64 |
| 599 | +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| 600 | +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload |
| 601 | +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| 602 | +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| 603 | +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| 604 | +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| 605 | +; CHECK-NEXT: ret |
| 606 | + call void @callee_sm() |
| 607 | + call void @callee_sm() |
| 608 | + ret void |
| 609 | +} |
| 610 | + |
| 611 | +; normal caller -> streaming callees (with ZA agnostic state) |
| 612 | +define void @test15(ptr %callee) nounwind "aarch64_za_state_agnostic" { |
| 613 | +; CHECK-LABEL: test15: |
| 614 | +; CHECK: // %bb.0: |
| 615 | +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| 616 | +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| 617 | +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| 618 | +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| 619 | +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| 620 | +; CHECK-NEXT: add x29, sp, #64 |
| 621 | +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| 622 | +; CHECK-NEXT: bl __arm_sme_state_size |
| 623 | +; CHECK-NEXT: sub sp, sp, x0 |
| 624 | +; CHECK-NEXT: mov x20, sp |
| 625 | +; CHECK-NEXT: mov x0, x20 |
| 626 | +; CHECK-NEXT: bl __arm_sme_save |
| 627 | +; CHECK-NEXT: smstart sm |
| 628 | +; CHECK-NEXT: bl callee_sm |
| 629 | +; CHECK-NEXT: bl callee_sm |
| 630 | +; CHECK-NEXT: smstop sm |
| 631 | +; CHECK-NEXT: mov x0, x20 |
| 632 | +; CHECK-NEXT: bl __arm_sme_restore |
| 633 | +; CHECK-NEXT: sub sp, x29, #64 |
| 634 | +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| 635 | +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| 636 | +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| 637 | +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| 638 | +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| 639 | +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| 640 | +; CHECK-NEXT: ret |
| 641 | + call void @callee_sm() |
| 642 | + call void @callee_sm() |
| 643 | + ret void |
| 644 | +} |
| 645 | + |
| 646 | +; locally streaming caller -> normal callees (with ZA state) |
| 647 | +define void @test16(ptr %callee) nounwind "aarch64_pstate_sm_body" "aarch64_new_za" { |
| 648 | +; CHECK-LABEL: test16: |
| 649 | +; CHECK: // %bb.0: |
| 650 | +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| 651 | +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| 652 | +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| 653 | +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| 654 | +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| 655 | +; CHECK-NEXT: add x29, sp, #64 |
| 656 | +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill |
| 657 | +; CHECK-NEXT: sub sp, sp, #16 |
| 658 | +; CHECK-NEXT: rdsvl x8, #1 |
| 659 | +; CHECK-NEXT: mov x9, sp |
| 660 | +; CHECK-NEXT: msub x9, x8, x8, x9 |
| 661 | +; CHECK-NEXT: mov sp, x9 |
| 662 | +; CHECK-NEXT: stp x9, x8, [x29, #-80] |
| 663 | +; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| 664 | +; CHECK-NEXT: cbz x8, .LBB17_2 |
| 665 | +; CHECK-NEXT: // %bb.1: |
| 666 | +; CHECK-NEXT: bl __arm_tpidr2_save |
| 667 | +; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| 668 | +; CHECK-NEXT: zero {za} |
| 669 | +; CHECK-NEXT: .LBB17_2: |
| 670 | +; CHECK-NEXT: smstart za |
| 671 | +; CHECK-NEXT: smstart sm |
| 672 | +; CHECK-NEXT: sub x8, x29, #80 |
| 673 | +; CHECK-NEXT: msr TPIDR2_EL0, x8 |
| 674 | +; CHECK-NEXT: smstop sm |
| 675 | +; CHECK-NEXT: bl callee |
| 676 | +; CHECK-NEXT: bl callee |
| 677 | +; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| 678 | +; CHECK-NEXT: smstop za |
| 679 | +; CHECK-NEXT: sub sp, x29, #64 |
| 680 | +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| 681 | +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload |
| 682 | +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| 683 | +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| 684 | +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| 685 | +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| 686 | +; CHECK-NEXT: ret |
| 687 | + call void @callee() |
| 688 | + call void @callee() |
| 689 | + ret void |
| 690 | +} |
0 commit comments