Skip to content

Commit 400faf9

Browse files
authored
[AArch64][SME] Treat agnostic ZA invokes like private ZA callees (llvm#162684)
An invoke of an agnostic ZA function behaves like a private ZA callee. If the invoke does not return normally (and we end up in an exception block in the caller), ZA must be committed to the caller's save buffer (and off). We can ensure this by setting up a ZA save before an agnostic ZA invoke. This will result in the agnostic ZA invoke committing ZA to its caller's save buffer, rather than its local buffer, which allows us to reload the correct contents of ZA within exception blocks. Note: This also means we must restore ZA on the non-exceptional path from the `invoke` (since ZA could have been committed to the save buffer in either case).
1 parent d49aa40 commit 400faf9

File tree

2 files changed

+248
-0
lines changed

2 files changed

+248
-0
lines changed

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
130130
if (auto *CalledFunction = CB.getCalledFunction())
131131
CalledFn = SMEAttrs(*CalledFunction, TLI);
132132

133+
// An `invoke` of an agnostic ZA function may not return normally (it may
134+
// resume in an exception block). In this case, it acts like a private ZA
135+
// callee and may require a ZA save to be set up before it is called.
136+
if (isa<InvokeInst>(CB))
137+
CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
138+
133139
// FIXME: We probably should not allow SME attributes on direct calls but
134140
// clang duplicates streaming mode attributes at each callsite.
135141
assert((IsIndirect ||

llvm/test/CodeGen/AArch64/sme-za-exceptions.ll

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,247 @@ exit:
732732
ret void
733733
}
734734

735+
; This example corresponds to:
736+
;
737+
; __arm_agnostic("sme_za_state") void try_catch_agnostic_za_invoke()
738+
; {
739+
; try {
740+
; agnostic_za_call();
741+
; } catch(...) {
742+
; }
743+
; }
744+
;
745+
; In this example we preserve all SME state enabled by PSTATE.ZA using
746+
; `__arm_sme_save` before agnostic_za_call(). This is because on all normal
747+
; returns from an agnostic ZA function ZA state should be preserved. That means
748+
; we need to make sure ZA state is saved in case agnostic_za_call() throws, and
749+
; we need to restore ZA state after unwinding to the catch block.
750+
751+
define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personality ptr @__gxx_personality_v0 {
752+
; CHECK-LABEL: try_catch_agnostic_za_invoke:
753+
; CHECK: .Lfunc_begin5:
754+
; CHECK-NEXT: .cfi_startproc
755+
; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
756+
; CHECK-NEXT: .cfi_lsda 28, .Lexception5
757+
; CHECK-NEXT: // %bb.0: // %entry
758+
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
759+
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
760+
; CHECK-NEXT: mov x29, sp
761+
; CHECK-NEXT: .cfi_def_cfa w29, 32
762+
; CHECK-NEXT: .cfi_offset w19, -16
763+
; CHECK-NEXT: .cfi_offset w30, -24
764+
; CHECK-NEXT: .cfi_offset w29, -32
765+
; CHECK-NEXT: bl __arm_sme_state_size
766+
; CHECK-NEXT: sub sp, sp, x0
767+
; CHECK-NEXT: mov x19, sp
768+
; CHECK-NEXT: .Ltmp15: // EH_LABEL
769+
; CHECK-NEXT: mov x0, x19
770+
; CHECK-NEXT: bl __arm_sme_save
771+
; CHECK-NEXT: bl agnostic_za_call
772+
; CHECK-NEXT: .Ltmp16: // EH_LABEL
773+
; CHECK-NEXT: .LBB5_1: // %exit
774+
; CHECK-NEXT: mov x0, x19
775+
; CHECK-NEXT: bl __arm_sme_restore
776+
; CHECK-NEXT: mov sp, x29
777+
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
778+
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
779+
; CHECK-NEXT: ret
780+
; CHECK-NEXT: .LBB5_2: // %catch
781+
; CHECK-NEXT: .Ltmp17: // EH_LABEL
782+
; CHECK-NEXT: bl __cxa_begin_catch
783+
; CHECK-NEXT: bl __cxa_end_catch
784+
; CHECK-NEXT: b .LBB5_1
785+
;
786+
; CHECK-SDAG-LABEL: try_catch_agnostic_za_invoke:
787+
; CHECK-SDAG: .Lfunc_begin5:
788+
; CHECK-SDAG-NEXT: .cfi_startproc
789+
; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
790+
; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception5
791+
; CHECK-SDAG-NEXT: // %bb.0: // %entry
792+
; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
793+
; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
794+
; CHECK-SDAG-NEXT: mov x29, sp
795+
; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32
796+
; CHECK-SDAG-NEXT: .cfi_offset w19, -16
797+
; CHECK-SDAG-NEXT: .cfi_offset w30, -24
798+
; CHECK-SDAG-NEXT: .cfi_offset w29, -32
799+
; CHECK-SDAG-NEXT: bl __arm_sme_state_size
800+
; CHECK-SDAG-NEXT: sub sp, sp, x0
801+
; CHECK-SDAG-NEXT: mov x19, sp
802+
; CHECK-SDAG-NEXT: .Ltmp15: // EH_LABEL
803+
; CHECK-SDAG-NEXT: mov x0, x19
804+
; CHECK-SDAG-NEXT: bl __arm_sme_save
805+
; CHECK-SDAG-NEXT: bl agnostic_za_call
806+
; CHECK-SDAG-NEXT: mov x0, x19
807+
; CHECK-SDAG-NEXT: bl __arm_sme_restore
808+
; CHECK-SDAG-NEXT: .Ltmp16: // EH_LABEL
809+
; CHECK-SDAG-NEXT: .LBB5_1: // %exit
810+
; CHECK-SDAG-NEXT: mov sp, x29
811+
; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
812+
; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
813+
; CHECK-SDAG-NEXT: ret
814+
; CHECK-SDAG-NEXT: .LBB5_2: // %catch
815+
; CHECK-SDAG-NEXT: .Ltmp17: // EH_LABEL
816+
; CHECK-SDAG-NEXT: mov x1, x0
817+
; CHECK-SDAG-NEXT: mov x0, x19
818+
; CHECK-SDAG-NEXT: bl __arm_sme_restore
819+
; CHECK-SDAG-NEXT: mov x0, x19
820+
; CHECK-SDAG-NEXT: bl __arm_sme_save
821+
; CHECK-SDAG-NEXT: mov x0, x1
822+
; CHECK-SDAG-NEXT: bl __cxa_begin_catch
823+
; CHECK-SDAG-NEXT: mov x0, x19
824+
; CHECK-SDAG-NEXT: bl __arm_sme_restore
825+
; CHECK-SDAG-NEXT: mov x0, x19
826+
; CHECK-SDAG-NEXT: bl __arm_sme_save
827+
; CHECK-SDAG-NEXT: bl __cxa_end_catch
828+
; CHECK-SDAG-NEXT: mov x0, x19
829+
; CHECK-SDAG-NEXT: bl __arm_sme_restore
830+
; CHECK-SDAG-NEXT: b .LBB5_1
831+
entry:
832+
invoke void @agnostic_za_call()
833+
to label %exit unwind label %catch
834+
835+
catch:
836+
%eh_info = landingpad { ptr, i32 }
837+
catch ptr null
838+
%exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
839+
tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
840+
tail call void @__cxa_end_catch()
841+
br label %exit
842+
843+
exit:
844+
ret void
845+
}
846+
847+
; This is the same `try_catch_agnostic_za_invoke`, but shows a lazy save would
848+
; also need to be committed in a shared-ZA function calling an agnostic-ZA function.
849+
define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personality ptr @__gxx_personality_v0 {
850+
; CHECK-LABEL: try_catch_inout_za_agnostic_za_callee:
851+
; CHECK: .Lfunc_begin6:
852+
; CHECK-NEXT: .cfi_startproc
853+
; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
854+
; CHECK-NEXT: .cfi_lsda 28, .Lexception6
855+
; CHECK-NEXT: // %bb.0: // %entry
856+
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
857+
; CHECK-NEXT: mov x29, sp
858+
; CHECK-NEXT: sub sp, sp, #16
859+
; CHECK-NEXT: .cfi_def_cfa w29, 16
860+
; CHECK-NEXT: .cfi_offset w30, -8
861+
; CHECK-NEXT: .cfi_offset w29, -16
862+
; CHECK-NEXT: rdsvl x8, #1
863+
; CHECK-NEXT: mov x9, sp
864+
; CHECK-NEXT: msub x9, x8, x8, x9
865+
; CHECK-NEXT: mov sp, x9
866+
; CHECK-NEXT: stp x9, x8, [x29, #-16]
867+
; CHECK-NEXT: .Ltmp18: // EH_LABEL
868+
; CHECK-NEXT: sub x8, x29, #16
869+
; CHECK-NEXT: msr TPIDR2_EL0, x8
870+
; CHECK-NEXT: bl agnostic_za_call
871+
; CHECK-NEXT: .Ltmp19: // EH_LABEL
872+
; CHECK-NEXT: .LBB6_1: // %exit
873+
; CHECK-NEXT: smstart za
874+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
875+
; CHECK-NEXT: sub x0, x29, #16
876+
; CHECK-NEXT: cbnz x8, .LBB6_3
877+
; CHECK-NEXT: // %bb.2: // %exit
878+
; CHECK-NEXT: bl __arm_tpidr2_restore
879+
; CHECK-NEXT: .LBB6_3: // %exit
880+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
881+
; CHECK-NEXT: mov sp, x29
882+
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
883+
; CHECK-NEXT: ret
884+
; CHECK-NEXT: .LBB6_4: // %catch
885+
; CHECK-NEXT: .Ltmp20: // EH_LABEL
886+
; CHECK-NEXT: bl __cxa_begin_catch
887+
; CHECK-NEXT: bl __cxa_end_catch
888+
; CHECK-NEXT: b .LBB6_1
889+
;
890+
; CHECK-SDAG-LABEL: try_catch_inout_za_agnostic_za_callee:
891+
; CHECK-SDAG: .Lfunc_begin6:
892+
; CHECK-SDAG-NEXT: .cfi_startproc
893+
; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
894+
; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception6
895+
; CHECK-SDAG-NEXT: // %bb.0: // %entry
896+
; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
897+
; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
898+
; CHECK-SDAG-NEXT: mov x29, sp
899+
; CHECK-SDAG-NEXT: sub sp, sp, #16
900+
; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32
901+
; CHECK-SDAG-NEXT: .cfi_offset w19, -16
902+
; CHECK-SDAG-NEXT: .cfi_offset w30, -24
903+
; CHECK-SDAG-NEXT: .cfi_offset w29, -32
904+
; CHECK-SDAG-NEXT: rdsvl x8, #1
905+
; CHECK-SDAG-NEXT: mov x9, sp
906+
; CHECK-SDAG-NEXT: msub x9, x8, x8, x9
907+
; CHECK-SDAG-NEXT: mov sp, x9
908+
; CHECK-SDAG-NEXT: stp x9, x8, [x29, #-16]
909+
; CHECK-SDAG-NEXT: .Ltmp18: // EH_LABEL
910+
; CHECK-SDAG-NEXT: sub x19, x29, #16
911+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19
912+
; CHECK-SDAG-NEXT: bl agnostic_za_call
913+
; CHECK-SDAG-NEXT: smstart za
914+
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
915+
; CHECK-SDAG-NEXT: sub x0, x29, #16
916+
; CHECK-SDAG-NEXT: cbnz x8, .LBB6_2
917+
; CHECK-SDAG-NEXT: // %bb.1: // %entry
918+
; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
919+
; CHECK-SDAG-NEXT: .LBB6_2: // %entry
920+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
921+
; CHECK-SDAG-NEXT: .Ltmp19: // EH_LABEL
922+
; CHECK-SDAG-NEXT: .LBB6_3: // %exit
923+
; CHECK-SDAG-NEXT: mov sp, x29
924+
; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
925+
; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
926+
; CHECK-SDAG-NEXT: ret
927+
; CHECK-SDAG-NEXT: .LBB6_4: // %catch
928+
; CHECK-SDAG-NEXT: .Ltmp20: // EH_LABEL
929+
; CHECK-SDAG-NEXT: mov x1, x0
930+
; CHECK-SDAG-NEXT: smstart za
931+
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
932+
; CHECK-SDAG-NEXT: sub x0, x29, #16
933+
; CHECK-SDAG-NEXT: cbnz x8, .LBB6_6
934+
; CHECK-SDAG-NEXT: // %bb.5: // %catch
935+
; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
936+
; CHECK-SDAG-NEXT: .LBB6_6: // %catch
937+
; CHECK-SDAG-NEXT: mov x0, x1
938+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
939+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19
940+
; CHECK-SDAG-NEXT: bl __cxa_begin_catch
941+
; CHECK-SDAG-NEXT: smstart za
942+
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
943+
; CHECK-SDAG-NEXT: sub x0, x29, #16
944+
; CHECK-SDAG-NEXT: cbnz x8, .LBB6_8
945+
; CHECK-SDAG-NEXT: // %bb.7: // %catch
946+
; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
947+
; CHECK-SDAG-NEXT: .LBB6_8: // %catch
948+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
949+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19
950+
; CHECK-SDAG-NEXT: bl __cxa_end_catch
951+
; CHECK-SDAG-NEXT: smstart za
952+
; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
953+
; CHECK-SDAG-NEXT: sub x0, x29, #16
954+
; CHECK-SDAG-NEXT: cbnz x8, .LBB6_10
955+
; CHECK-SDAG-NEXT: // %bb.9: // %catch
956+
; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
957+
; CHECK-SDAG-NEXT: .LBB6_10: // %catch
958+
; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
959+
; CHECK-SDAG-NEXT: b .LBB6_3
960+
entry:
961+
invoke void @agnostic_za_call()
962+
to label %exit unwind label %catch
963+
964+
catch:
965+
%eh_info = landingpad { ptr, i32 }
966+
catch ptr null
967+
%exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
968+
tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
969+
tail call void @__cxa_end_catch()
970+
br label %exit
971+
972+
exit:
973+
ret void
974+
}
975+
735976
declare ptr @__cxa_allocate_exception(i64)
736977
declare void @__cxa_throw(ptr, ptr, ptr)
737978
declare ptr @__cxa_begin_catch(ptr)
@@ -742,3 +983,4 @@ declare void @may_throw()
742983
declare void @shared_za_call() "aarch64_inout_za"
743984
declare void @noexcept_shared_za_call() "aarch64_inout_za"
744985
declare void @shared_zt0_call() "aarch64_inout_zt0"
986+
declare void @agnostic_za_call() "aarch64_za_state_agnostic"

0 commit comments

Comments
 (0)