@@ -333,8 +333,6 @@ void MachineSMEABI::insertStateChanges() {
333333 BlockInfo &Block = State.Blocks [MBB.getNumber ()];
334334 ZAState InState =
335335 State.BundleStates [Bundles->getBundle (MBB.getNumber (), /* Out=*/ false )];
336- ZAState OutState =
337- State.BundleStates [Bundles->getBundle (MBB.getNumber (), /* Out=*/ true )];
338336
339337 ZAState CurrentState = Block.FixedEntryState ;
340338 if (CurrentState == ZAState::ANY)
@@ -350,6 +348,8 @@ void MachineSMEABI::insertStateChanges() {
350348 if (MBB.succ_empty ())
351349 continue ;
352350
351+ ZAState OutState =
352+ State.BundleStates [Bundles->getBundle (MBB.getNumber (), /* Out=*/ true )];
353353 if (CurrentState != OutState)
354354 emitStateChange (MBB, MBB.getFirstTerminator (), CurrentState, OutState,
355355 Block.PhysLiveRegsAtExit );
@@ -397,8 +397,7 @@ PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
397397 PhysRegSave RegSave{PhysLiveRegs};
398398 if (PhysLiveRegs & LiveRegs::NZCV) {
399399 RegSave.StatusFlags = MRI->createVirtualRegister (&AArch64::GPR64RegClass);
400- BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS))
401- .addReg (RegSave.StatusFlags , RegState::Define)
400+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS), RegSave.StatusFlags )
402401 .addImm (AArch64SysReg::NZCV)
403402 .addReg (AArch64::NZCV, RegState::Implicit);
404403 }
@@ -445,8 +444,7 @@ void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB,
445444 .addImm (AArch64SVCR::SVCRZA)
446445 .addImm (1 );
447446 // Get current TPIDR2_EL0.
448- BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS))
449- .addReg (TPIDR2EL0, RegState::Define)
447+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS), TPIDR2EL0)
450448 .addImm (AArch64SysReg::TPIDR2_EL0);
451449 // Get pointer to TPIDR2 block.
452450 BuildMI (MBB, MBBI, DL, TII->get (AArch64::ADDXri), TPIDR2)
@@ -472,7 +470,6 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
472470 bool ClearTPIDR2) {
473471 DebugLoc DL = getDebugLoc (MBB, MBBI);
474472
475- // Clear TPIDR2.
476473 if (ClearTPIDR2)
477474 BuildMI (MBB, MBBI, DL, TII->get (AArch64::MSR))
478475 .addImm (AArch64SysReg::TPIDR2_EL0)
@@ -536,9 +533,10 @@ void MachineSMEABI::emitAllocateLazySaveBuffer(
536533 }
537534}
538535
536+ static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111 ;
539537static void emitZeroZA (const TargetInstrInfo &TII, DebugLoc DL,
540538 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
541- unsigned Mask) {
539+ unsigned Mask = ZERO_ALL_ZA_MASK ) {
542540 MachineInstrBuilder MIB =
543541 BuildMI (MBB, MBBI, DL, TII.get (AArch64::ZERO_M)).addImm (Mask);
544542 for (unsigned I = 0 ; I < 8 ; I++) {
@@ -569,9 +567,9 @@ void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
569567 BuildMI (MBB, MBBI, DL, TII->get (AArch64::MSRpstatesvcrImm1))
570568 .addImm (AArch64SVCR::SVCRZA)
571569 .addImm (1 );
572- // Zero ZA. Note: ZA state may new be needed for new ZT0 functions .
570+ // NOTE: Functions that only use ZT0 don't need to zero ZA .
573571 if (MF->getInfo <AArch64FunctionInfo>()->getSMEFnAttrs ().hasZAState ())
574- emitZeroZA (*TII, DL, MBB, MBBI, /* Mask= */ 0b11111111 );
572+ emitZeroZA (*TII, DL, MBB, MBBI);
575573}
576574
577575void MachineSMEABI::emitStateChange (MachineBasicBlock &MBB,
@@ -583,9 +581,14 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
583581 if (From == ZAState::ANY || To == ZAState::ANY)
584582 return ;
585583
584+ // If we're exiting from the CALLER_DORMANT state that means this new ZA
585+ // function did not touch ZA (so ZA was never turned on).
586+ if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
587+ return ;
588+
586589 // TODO: Avoid setting up the save buffer if there's no transition to
587590 // LOCAL_SAVED.
588- if (From == ZAState::CALLER_DORMANT && To != ZAState::OFF ) {
591+ if (From == ZAState::CALLER_DORMANT) {
589592 assert (MBB.getParent ()
590593 ->getInfo <AArch64FunctionInfo>()
591594 ->getSMEFnAttrs ()
@@ -598,7 +601,7 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
598601 return ; // Nothing more to do (ZA is active after the prologue).
599602
600603 // Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
601- // if "To" is "ZAState::LOCAL_SAVED". If may be possible to improve this
604+ // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
602605 // case by changing the placement of the zero instruction.
603606 From = ZAState::ACTIVE;
604607 }
@@ -608,10 +611,9 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
608611 else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
609612 emitRestoreLazySave (MBB, InsertPt, PhysLiveRegs);
610613 else if (To == ZAState::OFF) {
611- // If we're exiting from the CALLER_DORMANT state that means this new ZA
612- // function did not touch ZA (so ZA was never turned on).
613- if (From != ZAState::CALLER_DORMANT)
614- emitZAOff (MBB, InsertPt, /* ClearTPIDR2=*/ From == ZAState::LOCAL_SAVED);
614+ assert (From != ZAState::CALLER_DORMANT &&
615+ " CALLER_DORMANT to OFF should have already been handled" );
616+ emitZAOff (MBB, InsertPt, /* ClearTPIDR2=*/ From == ZAState::LOCAL_SAVED);
615617 } else {
616618 dbgs () << " Error: Transition from " << getZAStateString (From) << " to "
617619 << getZAStateString (To) << ' \n ' ;
0 commit comments