@@ -2286,21 +2286,6 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
22862286 }
22872287}
22882288
2289- // / Find a GPR restored in the epilogue that is not reserved.
2290- static Register findRestoredCalleeSaveGPR (const MachineFunction &MF) {
2291- const MachineFrameInfo &MFI = MF.getFrameInfo ();
2292- const MachineRegisterInfo &MRI = MF.getRegInfo ();
2293- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
2294- for (auto &CS : CSI) {
2295- Register Reg = CS.getReg ();
2296- if (!CS.isRestored () || MRI.isReserved (Reg) ||
2297- !AArch64::GPR64RegClass.contains (Reg))
2298- continue ;
2299- return Reg;
2300- }
2301- return AArch64::NoRegister;
2302- }
2303-
23042289void AArch64FrameLowering::emitEpilogue (MachineFunction &MF,
23052290 MachineBasicBlock &MBB) const {
23062291 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr ();
@@ -2550,49 +2535,69 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25502535 DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
25512536 NeedsWinCFI, &HasWinCFI);
25522537 } else if (SVEStackSize) {
2553- // If we have stack realignment or variable sized objects on the stack,
2554- // restore the stack pointer from the frame pointer prior to SVE CSR
2555- // restoration.
2556- if (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) {
2557- if (int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ()) {
2558- // Set SP to start of SVE callee-save area from which they can
2559- // be reloaded.
2560- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo ();
2561- if (!AFI->isStackRealigned () && RegInfo->hasBasePointer (MF)) {
2562- // If the stack is not realigned we can use the base pointer to find
2563- // the start of the SVE callee-saves (and deallocate locals).
2564- emitFrameOffset (
2565- MBB, RestoreBegin, DL, AArch64::SP, RegInfo->getBaseRegister (),
2566- StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy);
2567- } else {
2568- Register CalleeSaveBase = AArch64::FP;
2569- if (int64_t CalleeSaveBaseOffset =
2570- AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
2571- // This will find a GPR that is about to be restored -- so safe
2572- // to clobber. SVE functions have a "big stack" so always spill at
2573- // least one GPR (as a scratch register).
2574- CalleeSaveBase = findRestoredCalleeSaveGPR (MF);
2575- assert (CalleeSaveBase != AArch64::NoRegister);
2576- emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
2577- StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
2578- MachineInstr::FrameDestroy);
2579- }
2580- // The code below will deallocate the stack space space by moving the
2581- // SP to the start of the SVE callee-save area.
2582- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
2583- StackOffset::getScalable (-SVECalleeSavedSize), TII,
2584- MachineInstr::FrameDestroy);
2538+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo ();
2539+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ();
2540+ Register BaseForSVERestore = [&]() -> Register {
2541+ // With stack realignment we must use the FP to restore SVE CSRs (as both
2542+ // the SP and BP can't be used due to the unknown alignment padding).
2543+ if (AFI->isStackRealigned ())
2544+ return AArch64::FP;
2545+ // With variable sized objects on the stack, we can use the BP or FP to
2546+ // restore the SVE callee saves. If there are no SVE locals the BP will
2547+ // be more efficient (a single ADD).
2548+ if (MFI.hasVarSizedObjects ()) {
2549+ if (DeallocateBefore && !AFI->hasStackHazardSlotIndex ()) {
2550+ // If there's SVE locals and no hazard padding we can do:
2551+ // ADDVL SP, X29, #(-SVECalleeSavedSize)
2552+ return AArch64::FP;
25852553 }
2554+ // If there's SVE locals and hazard padding we can choose between:
2555+ // SUB TMP, X29, #(-CalleeSaveBaseOffset)
2556+ // ADDVL SP, TMP, #(-SVECalleeSavedSize)
2557+ // OR:
2558+ // ADD SP, BP, #NumBytes
2559+ // ADDVL SP, SP, #DeallocateBefore
2560+ // This chooses the latter as the "ADDVL" can be omitted if there's no
2561+ // SVE locals.
2562+ assert (RegInfo->hasBasePointer (MF) && " Expected base pointer!" );
2563+ return RegInfo->getBaseRegister ();
25862564 }
2587- } else {
2588- if (AFI->getSVECalleeSavedStackSize ()) {
2565+ // In the standard case we use the SP.
2566+ return AArch64::SP;
2567+ }();
2568+
2569+ if (SVECalleeSavedSize && BaseForSVERestore == AArch64::FP) {
2570+ Register CalleeSaveBase = AArch64::FP;
2571+ if (int64_t CalleeSaveBaseOffset =
2572+ AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
2573+ // If we have have an non-zero offset to the non-SVE CS base we need to
2574+ // compute the base address by subtracting the offest in a temporary
2575+ // register. SVE functions have a "big stack" so there should be at
2576+ // least one scratch register available.
2577+ RegScavenger RS;
2578+ RS.enterBasicBlockEnd (MBB);
2579+ RS.backward (MBBI);
2580+ CalleeSaveBase = RS.FindUnusedReg (&AArch64::GPR64commonRegClass);
2581+ assert (CalleeSaveBase != AArch64::NoRegister);
2582+ emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
2583+ StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
2584+ MachineInstr::FrameDestroy);
2585+ }
2586+ // The code below will deallocate the stack space space by moving the
2587+ // SP to the start of the SVE callee-save area.
2588+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
2589+ StackOffset::getScalable (-SVECalleeSavedSize), TII,
2590+ MachineInstr::FrameDestroy);
2591+ } else if (BaseForSVERestore == AArch64::SP || SVECalleeSavedSize) {
2592+ if (SVECalleeSavedSize) {
25892593 // Deallocate the non-SVE locals first before we can deallocate (and
25902594 // restore callee saves) from the SVE area.
25912595 emitFrameOffset (
2592- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP ,
2596+ MBB, RestoreBegin, DL, AArch64::SP, BaseForSVERestore ,
25932597 StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy,
25942598 false , NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP (MF),
25952599 SVEStackSize + StackOffset::getFixed (NumBytes + PrologueSaveSize));
2600+
25962601 NumBytes = 0 ;
25972602 }
25982603
@@ -2602,11 +2607,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
26022607 SVEStackSize +
26032608 StackOffset::getFixed (NumBytes + PrologueSaveSize));
26042609
2605- emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2606- DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2607- NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP (MF),
2608- DeallocateAfter +
2609- StackOffset::getFixed (NumBytes + PrologueSaveSize));
2610+ if (BaseForSVERestore == AArch64::SP) {
2611+ // Note: If the base is not SP it is the base pointer, in which case the
2612+ // SVE CSs will be implicitly deallocated by setting the SP to the FP to
2613+ // restore the non-SVE CSs.
2614+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
2615+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
2616+ NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP (MF),
2617+ DeallocateAfter +
2618+ StackOffset::getFixed (NumBytes + PrologueSaveSize));
2619+ }
26102620 }
26112621 if (EmitCFI)
26122622 emitCalleeSavedSVERestores (MBB, RestoreEnd);
0 commit comments