@@ -72,16 +72,30 @@ using namespace llvm;
7272
7373namespace {
7474
75- enum ZAState {
75+ // Note: For agnostic ZA, we assume the function is always entered/exited in the
76+ // "ACTIVE" state -- this _may_ not be the case (since OFF is also a
77+ // possibility, but for the purpose of placing ZA saves/restores, that does not
78+ // matter).
79+ enum ZAState : uint8_t {
7680 // Any/unknown state (not valid)
7781 ANY = 0 ,
7882
7983 // ZA is in use and active (i.e. within the accumulator)
8084 ACTIVE,
8185
86+ // ZA is active, but ZT0 has been saved.
87+ // This handles the edge case of sharedZA && !sharesZT0.
88+ ACTIVE_ZT0_SAVED,
89+
8290 // A ZA save has been set up or committed (i.e. ZA is dormant or off)
91+ // If the function uses ZT0 it must also be saved.
8392 LOCAL_SAVED,
8493
94+ // ZA has been committed to the lazy save buffer of the current function.
95+ // If the function uses ZT0 it must also be saved.
96+ // ZA is off when a save has been committed.
97+ LOCAL_COMMITTED,
98+
8599 // The ZA/ZT0 state on entry to the function.
86100 ENTRY,
87101
@@ -164,6 +178,14 @@ class EmitContext {
164178 return AgnosticZABufferPtr;
165179 }
166180
181+ int getZT0SaveSlot (MachineFunction &MF) {
182+ if (ZT0SaveFI)
183+ return *ZT0SaveFI;
184+ MachineFrameInfo &MFI = MF.getFrameInfo ();
185+ ZT0SaveFI = MFI.CreateSpillStackObject (64 , Align (16 ));
186+ return *ZT0SaveFI;
187+ }
188+
167189 // / Returns true if the function must allocate a ZA save buffer on entry. This
168190 // / will be the case if, at any point in the function, a ZA save was emitted.
169191 bool needsSaveBuffer () const {
@@ -173,6 +195,7 @@ class EmitContext {
173195 }
174196
175197private:
198+ std::optional<int > ZT0SaveFI;
176199 std::optional<int > TPIDR2BlockFI;
177200 Register AgnosticZABufferPtr = AArch64::NoRegister;
178201};
@@ -184,8 +207,10 @@ class EmitContext {
184207// / state would not be legal, as transitioning to it drops the content of ZA.
185208static bool isLegalEdgeBundleZAState (ZAState State) {
186209 switch (State) {
187- case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
188- case ZAState::LOCAL_SAVED: // ZA state is saved on the stack.
210+ case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
211+ case ZAState::ACTIVE_ZT0_SAVED: // ZT0 is saved (ZA is active).
212+ case ZAState::LOCAL_SAVED: // ZA state may be saved on the stack.
213+ case ZAState::LOCAL_COMMITTED: // ZA state is saved on the stack.
189214 return true ;
190215 default :
191216 return false ;
@@ -199,7 +224,9 @@ StringRef getZAStateString(ZAState State) {
199224 switch (State) {
200225 MAKE_CASE (ZAState::ANY)
201226 MAKE_CASE (ZAState::ACTIVE)
227+ MAKE_CASE (ZAState::ACTIVE_ZT0_SAVED)
202228 MAKE_CASE (ZAState::LOCAL_SAVED)
229+ MAKE_CASE (ZAState::LOCAL_COMMITTED)
203230 MAKE_CASE (ZAState::ENTRY)
204231 MAKE_CASE (ZAState::OFF)
205232 default :
@@ -221,18 +248,34 @@ static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
221248// / Returns the required ZA state needed before \p MI and an iterator pointing
222249// / to where any code required to change the ZA state should be inserted.
223250static std::pair<ZAState, MachineBasicBlock::iterator>
224- getZAStateBeforeInst (const TargetRegisterInfo &TRI, MachineInstr &MI,
225- bool ZAOffAtReturn ) {
251+ getInstNeededZAState (const TargetRegisterInfo &TRI, MachineInstr &MI,
252+ SMEAttrs SMEFnAttrs ) {
226253 MachineBasicBlock::iterator InsertPt (MI);
227254
228255 if (MI.getOpcode () == AArch64::InOutZAUsePseudo)
229256 return {ZAState::ACTIVE, std::prev (InsertPt)};
230257
258+ // Note: If we need to save both ZA and ZT0 we use RequiresZASavePseudo.
231259 if (MI.getOpcode () == AArch64::RequiresZASavePseudo)
232260 return {ZAState::LOCAL_SAVED, std::prev (InsertPt)};
233261
234- if (MI.isReturn ())
262+ // If we only need to save ZT0 there's two cases to consider:
263+ // 1. The function has ZA state (that we don't need to save).
264+ // - In this case we switch to the "ACTIVE_ZT0_SAVED" state.
265+ // This only saves ZT0.
266+ // 2. The function does not have ZA state
267+ // - In this case we switch to "LOCAL_COMMITTED" state.
268+ // This saves ZT0 and turns ZA off.
269+ if (MI.getOpcode () == AArch64::RequiresZT0SavePseudo) {
270+ return {SMEFnAttrs.hasZAState () ? ZAState::ACTIVE_ZT0_SAVED
271+ : ZAState::LOCAL_COMMITTED,
272+ std::prev (InsertPt)};
273+ }
274+
275+ if (MI.isReturn ()) {
276+ bool ZAOffAtReturn = SMEFnAttrs.hasPrivateZAInterface ();
235277 return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
278+ }
236279
237280 for (auto &MO : MI.operands ()) {
238281 if (isZAorZTRegOp (TRI, MO))
@@ -280,6 +323,9 @@ struct MachineSMEABI : public MachineFunctionPass {
280323 // / predecessors).
281324 void propagateDesiredStates (FunctionInfo &FnInfo, bool Forwards = true );
282325
326+ void emitZT0SaveRestore (EmitContext &, MachineBasicBlock &MBB,
327+ MachineBasicBlock::iterator MBBI, bool IsSave);
328+
283329 // Emission routines for private and shared ZA functions (using lazy saves).
284330 void emitSMEPrologue (MachineBasicBlock &MBB,
285331 MachineBasicBlock::iterator MBBI);
@@ -290,8 +336,8 @@ struct MachineSMEABI : public MachineFunctionPass {
290336 MachineBasicBlock::iterator MBBI);
291337 void emitAllocateLazySaveBuffer (EmitContext &, MachineBasicBlock &MBB,
292338 MachineBasicBlock::iterator MBBI);
293- void emitZAOff (MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
294- bool ClearTPIDR2);
339+ void emitZAMode (MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
340+ bool ClearTPIDR2, bool On );
295341
296342 // Emission routines for agnostic ZA functions.
297343 void emitSetupFullZASave (MachineBasicBlock &MBB,
@@ -409,7 +455,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
409455 Block.FixedEntryState = ZAState::ENTRY;
410456 } else if (MBB.isEHPad ()) {
411457 // EH entry block:
412- Block.FixedEntryState = ZAState::LOCAL_SAVED ;
458+ Block.FixedEntryState = ZAState::LOCAL_COMMITTED ;
413459 }
414460
415461 LiveRegUnits LiveUnits (*TRI);
@@ -431,8 +477,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
431477 PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
432478 }
433479 // Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
434- auto [NeededState, InsertPt] = getZAStateBeforeInst (
435- *TRI, MI, /* ZAOffAtReturn=*/ SMEFnAttrs.hasPrivateZAInterface ());
480+ auto [NeededState, InsertPt] = getInstNeededZAState (*TRI, MI, SMEFnAttrs);
436481 assert ((InsertPt == MBBI || isCallStartOpcode (InsertPt->getOpcode ())) &&
437482 " Unexpected state change insertion point!" );
438483 // TODO: Do something to avoid state changes where NZCV is live.
@@ -752,9 +797,9 @@ void MachineSMEABI::emitRestoreLazySave(EmitContext &Context,
752797 restorePhyRegSave (RegSave, MBB, MBBI, DL);
753798}
754799
755- void MachineSMEABI::emitZAOff (MachineBasicBlock &MBB,
756- MachineBasicBlock::iterator MBBI,
757- bool ClearTPIDR2) {
800+ void MachineSMEABI::emitZAMode (MachineBasicBlock &MBB,
801+ MachineBasicBlock::iterator MBBI,
802+ bool ClearTPIDR2, bool On ) {
758803 DebugLoc DL = getDebugLoc (MBB, MBBI);
759804
760805 if (ClearTPIDR2)
@@ -765,7 +810,7 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
765810 // Disable ZA.
766811 BuildMI (MBB, MBBI, DL, TII->get (AArch64::MSRpstatesvcrImm1))
767812 .addImm (AArch64SVCR::SVCRZA)
768- .addImm (0 );
813+ .addImm (On ? 1 : 0 );
769814}
770815
771816void MachineSMEABI::emitAllocateLazySaveBuffer (
@@ -891,6 +936,28 @@ void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context,
891936 restorePhyRegSave (RegSave, MBB, MBBI, DL);
892937}
893938
939+ void MachineSMEABI::emitZT0SaveRestore (EmitContext &Context,
940+ MachineBasicBlock &MBB,
941+ MachineBasicBlock::iterator MBBI,
942+ bool IsSave) {
943+ DebugLoc DL = getDebugLoc (MBB, MBBI);
944+ Register ZT0Save = MRI->createVirtualRegister (&AArch64::GPR64spRegClass);
945+
946+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::ADDXri), ZT0Save)
947+ .addFrameIndex (Context.getZT0SaveSlot (*MF))
948+ .addImm (0 )
949+ .addImm (0 );
950+
951+ if (IsSave) {
952+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::STR_TX))
953+ .addReg (AArch64::ZT0)
954+ .addReg (ZT0Save);
955+ } else {
956+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::LDR_TX), AArch64::ZT0)
957+ .addReg (ZT0Save);
958+ }
959+ }
960+
894961void MachineSMEABI::emitAllocateFullZASaveBuffer (
895962 EmitContext &Context, MachineBasicBlock &MBB,
896963 MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) {
@@ -935,6 +1002,17 @@ void MachineSMEABI::emitAllocateFullZASaveBuffer(
9351002 restorePhyRegSave (RegSave, MBB, MBBI, DL);
9361003}
9371004
1005+ struct FromState {
1006+ ZAState From;
1007+
1008+ constexpr uint8_t to (ZAState To) const {
1009+ static_assert (NUM_ZA_STATE < 16 , " expected ZAState to fit in 4-bits" );
1010+ return uint8_t (From) << 4 | uint8_t (To);
1011+ }
1012+ };
1013+
1014+ constexpr FromState transitionFrom (ZAState From) { return FromState{From}; }
1015+
9381016void MachineSMEABI::emitStateChange (EmitContext &Context,
9391017 MachineBasicBlock &MBB,
9401018 MachineBasicBlock::iterator InsertPt,
@@ -966,17 +1044,63 @@ void MachineSMEABI::emitStateChange(EmitContext &Context,
9661044 From = ZAState::ACTIVE;
9671045 }
9681046
969- if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
970- emitZASave (Context, MBB, InsertPt, PhysLiveRegs);
971- else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
972- emitZARestore (Context, MBB, InsertPt, PhysLiveRegs);
973- else if (To == ZAState::OFF) {
974- assert (From != ZAState::ENTRY &&
975- " ENTRY to OFF should have already been handled" );
976- assert (!SMEFnAttrs.hasAgnosticZAInterface () &&
977- " Should not turn ZA off in agnostic ZA function" );
978- emitZAOff (MBB, InsertPt, /* ClearTPIDR2=*/ From == ZAState::LOCAL_SAVED);
979- } else {
1047+ bool IsAgnosticZA = SMEFnAttrs.hasAgnosticZAInterface ();
1048+ bool HasZT0State = SMEFnAttrs.hasZT0State ();
1049+ bool HasZAState = IsAgnosticZA || SMEFnAttrs.hasZAState ();
1050+
1051+ switch (transitionFrom (From).to (To)) {
1052+ // This section handles: ACTIVE <-> ACTIVE_ZT0_SAVED
1053+ case transitionFrom (ZAState::ACTIVE).to (ZAState::ACTIVE_ZT0_SAVED):
1054+ emitZT0SaveRestore (Context, MBB, InsertPt, /* IsSave=*/ true );
1055+ break ;
1056+ case transitionFrom (ZAState::ACTIVE_ZT0_SAVED).to (ZAState::ACTIVE):
1057+ emitZT0SaveRestore (Context, MBB, InsertPt, /* IsSave=*/ false );
1058+ break ;
1059+
1060+ // This section handles: ACTIVE -> LOCAL_SAVED
1061+ case transitionFrom (ZAState::ACTIVE).to (ZAState::LOCAL_SAVED):
1062+ if (HasZT0State)
1063+ emitZT0SaveRestore (Context, MBB, InsertPt, /* IsSave=*/ true );
1064+ if (HasZAState)
1065+ emitZASave (Context, MBB, InsertPt, PhysLiveRegs);
1066+ break ;
1067+
1068+ // This section handles: ACTIVE -> LOCAL_COMMITTED
1069+ case transitionFrom (ZAState::ACTIVE).to (ZAState::LOCAL_COMMITTED):
1070+ // Note: We could support ZA state here, but this transition is currently
1071+ // only possible when we _don't_ have ZA state.
1072+ assert (HasZT0State && !HasZAState && " Expect to only have ZT0 state." );
1073+ emitZT0SaveRestore (Context, MBB, InsertPt, /* IsSave=*/ true );
1074+ emitZAMode (MBB, InsertPt, /* ClearTPIDR2=*/ false , /* On=*/ false );
1075+ break ;
1076+
1077+ // This section handles: LOCAL_COMMITTED -> (OFF|LOCAL_SAVED)
1078+ case transitionFrom (ZAState::LOCAL_COMMITTED).to (ZAState::OFF):
1079+ case transitionFrom (ZAState::LOCAL_COMMITTED).to (ZAState::LOCAL_SAVED):
1080+ // These transistions are a no-op.
1081+ break ;
1082+
1083+ // This section handles: LOCAL_(SAVED|COMMITTED) -> ACTIVE[_ZT0_SAVED]
1084+ case transitionFrom (ZAState::LOCAL_COMMITTED).to (ZAState::ACTIVE):
1085+ case transitionFrom (ZAState::LOCAL_COMMITTED).to (ZAState::ACTIVE_ZT0_SAVED):
1086+ case transitionFrom (ZAState::LOCAL_SAVED).to (ZAState::ACTIVE):
1087+ if (HasZAState)
1088+ emitZARestore (Context, MBB, InsertPt, PhysLiveRegs);
1089+ else
1090+ emitZAMode (MBB, InsertPt, /* ClearTPIDR2=*/ false , /* On=*/ true );
1091+ if (HasZT0State && To == ZAState::ACTIVE)
1092+ emitZT0SaveRestore (Context, MBB, InsertPt, /* IsSave=*/ false );
1093+ break ;
1094+ default :
1095+ if (To == ZAState::OFF) {
1096+ assert (From != ZAState::ENTRY &&
1097+ " ENTRY to OFF should have already been handled" );
1098+ assert (SMEFnAttrs.hasPrivateZAInterface () &&
1099+ " Did not expect to turn ZA off in shared/agnostic ZA function" );
1100+ emitZAMode (MBB, InsertPt, /* ClearTPIDR2=*/ From == ZAState::LOCAL_SAVED,
1101+ /* On=*/ false );
1102+ break ;
1103+ }
9801104 dbgs () << " Error: Transition from " << getZAStateString (From) << " to "
9811105 << getZAStateString (To) << ' \n ' ;
9821106 llvm_unreachable (" Unimplemented state transition" );
0 commit comments