1010// implementing the lazy ZA state save schemes around calls.
1111//
1212// ===----------------------------------------------------------------------===//
13+ //
14+ // This pass works by collecting instructions that require ZA to be in a
15+ // specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
16+ // transitions to ensure ZA is in the required state before instructions. State
17+ // transitions represent actions such as setting up or restoring a lazy save.
18+ // Certain points within a function may also have predefined states independent
19+ // of any instructions, for example, a "shared_za" function is always entered
20+ // and exited in the "ACTIVE" state.
21+ //
22+ // To handle ZA state across control flow, we make use of edge bundling. This
23+ // assigns each block an "incoming" and "outgoing" edge bundle (representing
24+ // incoming and outgoing edges). Initially, these are unique to each block;
25+ // then, in the process of forming bundles, the outgoing block of a block is
26+ // joined with the incoming bundle of all successors. The result is that each
27+ // bundle can be assigned a single ZA state, which ensures the state required by
28+ // all a blocks' successors is the same, and that each basic block will always
29+ // be entered with the same ZA state. This eliminates the need for splitting
30+ // edges to insert state transitions or "phi" nodes for ZA states.
31+ //
32+ // See below for a simple example of edge bundling.
33+ //
34+ // The following shows a conditionally executed basic block (BB1):
35+ //
36+ // if (cond)
37+ // BB1
38+ // BB2
39+ //
40+ // Initial Bundles Joined Bundles
41+ //
42+ // ┌──0──┐ ┌──0──┐
43+ // │ BB0 │ │ BB0 │
44+ // └──1──┘ └──1──┘
45+ // ├───────┐ ├───────┐
46+ // ▼ │ ▼ │
47+ // ┌──2──┐ │ ─────► ┌──1──┐ │
48+ // │ BB1 │ ▼ │ BB1 │ ▼
49+ // └──3──┘ ┌──4──┐ └──1──┘ ┌──1──┐
50+ // └───►4 BB2 │ └───►1 BB2 │
51+ // └──5──┘ └──2──┘
52+ //
53+ // On the left are the initial per-block bundles, and on the right are the
54+ // joined bundles (which are the result of the EdgeBundles analysis).
1355
1456#include " AArch64InstrInfo.h"
1557#include " AArch64MachineFunctionInfo.h"
@@ -210,7 +252,7 @@ struct MachineSMEABI : public MachineFunctionPass {
210252 } State;
211253
212254 MachineFunction *MF = nullptr ;
213- EdgeBundles *Bundles = nullptr ;
255+ EdgeBundles *EdgeBundles = nullptr ;
214256 const AArch64Subtarget *Subtarget = nullptr ;
215257 const AArch64RegisterInfo *TRI = nullptr ;
216258 const TargetInstrInfo *TII = nullptr ;
@@ -274,8 +316,8 @@ void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
274316}
275317
276318void MachineSMEABI::assignBundleZAStates () {
277- State.BundleStates .resize (Bundles ->getNumBundles ());
278- for (unsigned I = 0 , E = Bundles ->getNumBundles (); I != E; ++I) {
319+ State.BundleStates .resize (EdgeBundles ->getNumBundles ());
320+ for (unsigned I = 0 , E = EdgeBundles ->getNumBundles (); I != E; ++I) {
279321 LLVM_DEBUG (dbgs () << " Assigning ZA state for edge bundle: " << I << ' \n ' );
280322
281323 // Attempt to assign a ZA state for this bundle that minimizes state
@@ -284,16 +326,16 @@ void MachineSMEABI::assignBundleZAStates() {
284326 // TODO: We should propagate desired incoming/outgoing states through blocks
285327 // that have the "ANY" state first to make better global decisions.
286328 int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0 };
287- for (unsigned BlockID : Bundles ->getBlocks (I)) {
329+ for (unsigned BlockID : EdgeBundles ->getBlocks (I)) {
288330 LLVM_DEBUG (dbgs () << " - bb." << BlockID);
289331
290332 const BlockInfo &Block = State.Blocks [BlockID];
291333 if (Block.Insts .empty ()) {
292334 LLVM_DEBUG (dbgs () << " (no state preference)\n " );
293335 continue ;
294336 }
295- bool InEdge = Bundles ->getBundle (BlockID, /* Out=*/ false ) == I;
296- bool OutEdge = Bundles ->getBundle (BlockID, /* Out=*/ true ) == I;
337+ bool InEdge = EdgeBundles ->getBundle (BlockID, /* Out=*/ false ) == I;
338+ bool OutEdge = EdgeBundles ->getBundle (BlockID, /* Out=*/ true ) == I;
297339
298340 ZAState DesiredIncomingState = Block.Insts .front ().NeededState ;
299341 if (InEdge && isLegalEdgeBundleZAState (DesiredIncomingState)) {
@@ -333,8 +375,8 @@ void MachineSMEABI::assignBundleZAStates() {
333375void MachineSMEABI::insertStateChanges () {
334376 for (MachineBasicBlock &MBB : *MF) {
335377 const BlockInfo &Block = State.Blocks [MBB.getNumber ()];
336- ZAState InState =
337- State. BundleStates [Bundles-> getBundle (MBB. getNumber (), /* Out=*/ false )];
378+ ZAState InState = State. BundleStates [EdgeBundles-> getBundle (MBB. getNumber (),
379+ /* Out=*/ false )];
338380
339381 ZAState CurrentState = Block.FixedEntryState ;
340382 if (CurrentState == ZAState::ANY)
@@ -350,8 +392,8 @@ void MachineSMEABI::insertStateChanges() {
350392 if (MBB.succ_empty ())
351393 continue ;
352394
353- ZAState OutState =
354- State. BundleStates [Bundles-> getBundle ( MBB.getNumber (), /* Out=*/ true )];
395+ ZAState OutState = State. BundleStates [EdgeBundles-> getBundle (
396+ MBB.getNumber (), /* Out=*/ true )];
355397 if (CurrentState != OutState)
356398 emitStateChange (MBB, MBB.getFirstTerminator (), CurrentState, OutState,
357399 Block.PhysLiveRegsAtExit );
@@ -632,7 +674,7 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
632674 // Reset pass state.
633675 State = PassState{};
634676 this ->MF = &MF;
635- Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles ();
677+ EdgeBundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles ();
636678 Subtarget = &MF.getSubtarget <AArch64Subtarget>();
637679 TII = Subtarget->getInstrInfo ();
638680 TRI = Subtarget->getRegisterInfo ();
0 commit comments