@@ -504,6 +504,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
504504 MFI.hasStackMap () || MFI.hasPatchPoint () ||
505505 RegInfo->hasStackRealignment (MF))
506506 return true ;
507+
508+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
509+ if (AFI->hasPoplessEpilogue ())
510+ return true ;
511+
507512 // With large callframes around we may need to use FP to access the scavenging
508513 // emergency spillslot.
509514 //
@@ -1119,6 +1124,12 @@ bool AArch64FrameLowering::canUseAsPrologue(
11191124 return false ;
11201125 }
11211126
1127+ // If we have some return path that's popless, it needs its own very-special
1128+ // epilogue, so we can't shrink-wrap it away.
1129+ // FIXME: this and some of the below checks belong in enableShrinkWrapping.
1130+ if (AFI->hasPoplessEpilogue ())
1131+ return false ;
1132+
11221133 // Certain stack probing sequences might clobber flags, then we can't use
11231134 // the block as a prologue if the flags register is a live-in.
11241135 if (MF->getInfo <AArch64FunctionInfo>()->hasStackProbing () &&
@@ -1204,6 +1215,12 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
12041215
12051216bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue (
12061217 MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
1218+
1219+ MachineFunction &MF = *MBB.getParent ();
1220+ AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1221+ if (AFI->hasPoplessEpilogue ())
1222+ return false ;
1223+
12071224 if (!shouldCombineCSRLocalStackBump (*MBB.getParent (), StackBumpBytes))
12081225 return false ;
12091226
@@ -1560,6 +1577,47 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
15601577 return std::prev (MBB.erase (MBBI));
15611578}
15621579
1580+ static void fixupCalleeSaveRestoreToFPBased (MachineInstr &MI,
1581+ uint64_t FPSPOffset) {
1582+ assert (!AArch64InstrInfo::isSEHInstruction (MI));
1583+
1584+ unsigned Opc = MI.getOpcode ();
1585+ unsigned Scale;
1586+ switch (Opc) {
1587+ case AArch64::STPXi:
1588+ case AArch64::STRXui:
1589+ case AArch64::STPDi:
1590+ case AArch64::STRDui:
1591+ case AArch64::LDPXi:
1592+ case AArch64::LDRXui:
1593+ case AArch64::LDPDi:
1594+ case AArch64::LDRDui:
1595+ Scale = 8 ;
1596+ break ;
1597+ case AArch64::STPQi:
1598+ case AArch64::STRQui:
1599+ case AArch64::LDPQi:
1600+ case AArch64::LDRQui:
1601+ Scale = 16 ;
1602+ break ;
1603+ default :
1604+ llvm_unreachable (" Unexpected callee-save save/restore opcode!" );
1605+ }
1606+
1607+ unsigned OffsetIdx = MI.getNumExplicitOperands () - 1 ;
1608+
1609+ MachineOperand &BaseRegOpnd = MI.getOperand (OffsetIdx - 1 );
1610+ assert (BaseRegOpnd.getReg () == AArch64::SP &&
1611+ " Unexpected base register in callee-save save/restore instruction!" );
1612+ BaseRegOpnd.setReg (AArch64::FP); // XXX TRI
1613+
1614+ // Last operand is immediate offset that needs fixing.
1615+ MachineOperand &OffsetOpnd = MI.getOperand (OffsetIdx);
1616+ // All generated opcodes have scaled offsets.
1617+ assert (FPSPOffset % Scale == 0 );
1618+ OffsetOpnd.setImm (OffsetOpnd.getImm () - FPSPOffset / Scale);
1619+ }
1620+
15631621// Fixup callee-save register save/restore instructions to take into account
15641622// combined SP bump by adding the local stack size to the stack offsets.
15651623static void fixupCalleeSaveRestoreStackOffset (MachineInstr &MI,
@@ -2298,10 +2356,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
22982356 bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo (MF);
22992357 bool HasWinCFI = false ;
23002358 bool IsFunclet = false ;
2359+ bool IsSwiftCoroPartialReturn = false ;
23012360
23022361 if (MBB.end () != MBBI) {
23032362 DL = MBBI->getDebugLoc ();
23042363 IsFunclet = isFuncletReturnInstr (*MBBI);
2364+ IsSwiftCoroPartialReturn = MBBI->getOpcode () == AArch64::RET_POPLESS;
2365+ }
2366+
2367+ if (IsSwiftCoroPartialReturn) {
2368+ // The partial-return intrin/instr requires the swiftcoro cc
2369+ if (MF.getFunction ().getCallingConv () != CallingConv::SwiftCoro)
2370+ report_fatal_error (" llvm.ret.popless requires swiftcorocc" );
2371+ assert (MBBI->getOpcode () == AArch64::RET_POPLESS);
2372+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::RET_ReallyLR))
2373+ .setMIFlag (MachineInstr::FrameDestroy);
2374+ MBB.erase (MBBI);
23052375 }
23062376
23072377 MachineBasicBlock::iterator EpilogStartI = MBB.end ();
@@ -2350,6 +2420,39 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23502420 if (Info.getReg () != AArch64::LR)
23512421 continue ;
23522422 MachineBasicBlock::iterator TI = MBB.getFirstTerminator ();
2423+
2424+ // When we're doing a popless ret (i.e., that doesn't restore SP), we
2425+ // can't rely on the exit SP being the same as the entry, but they need
2426+ // to match for the LR auth to succeed. Instead, derive the entry SP
2427+ // from our FP (using a -16 static offset for the size of the frame
2428+ // record itself), save that into X16, and use that as the discriminator
2429+ // in an AUTIB.
2430+ if (IsSwiftCoroPartialReturn) {
2431+ const auto *TRI = Subtarget.getRegisterInfo ();
2432+
2433+ MachineBasicBlock::iterator EpilogStartI = MBB.getFirstTerminator ();
2434+ MachineBasicBlock::iterator Begin = MBB.begin ();
2435+ while (EpilogStartI != Begin) {
2436+ --EpilogStartI;
2437+ if (!EpilogStartI->getFlag (MachineInstr::FrameDestroy)) {
2438+ ++EpilogStartI;
2439+ break ;
2440+ }
2441+ if (EpilogStartI->readsRegister (AArch64::X16, TRI) ||
2442+ EpilogStartI->modifiesRegister (AArch64::X16, TRI))
2443+ report_fatal_error (" unable to use x16 for popless ret LR auth" );
2444+ }
2445+
2446+ emitFrameOffset (MBB, EpilogStartI, DL, AArch64::X16, AArch64::FP,
2447+ StackOffset::getFixed (16 ), TII,
2448+ MachineInstr::FrameDestroy);
2449+ BuildMI (MBB, TI, DL, TII->get (AArch64::AUTIB), AArch64::LR)
2450+ .addUse (AArch64::LR)
2451+ .addUse (AArch64::X16)
2452+ .setMIFlag (MachineInstr::FrameDestroy);
2453+ return ;
2454+ }
2455+
23532456 if (TI != MBB.end () && TI->getOpcode () == AArch64::RET_ReallyLR) {
23542457 // If there is a terminator and it's a RET, we can fold AUTH into it.
23552458 // Be careful to keep the implicitly returned registers.
@@ -2383,6 +2486,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23832486 AFI->setLocalStackSize (NumBytes - PrologueSaveSize);
23842487 if (homogeneousPrologEpilog (MF, &MBB)) {
23852488 assert (!NeedsWinCFI);
2489+ assert (!IsSwiftCoroPartialReturn);
23862490 auto LastPopI = MBB.getFirstTerminator ();
23872491 if (LastPopI != MBB.begin ()) {
23882492 auto HomogeneousEpilog = std::prev (LastPopI);
@@ -2404,7 +2508,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24042508 // Assume we can't combine the last pop with the sp restore.
24052509
24062510 bool CombineAfterCSRBump = false ;
2407- if (!CombineSPBump && PrologueSaveSize != 0 ) {
2511+ if (!CombineSPBump && PrologueSaveSize != 0 && !IsSwiftCoroPartialReturn ) {
24082512 MachineBasicBlock::iterator Pop = std::prev (MBB.getFirstTerminator ());
24092513 while (Pop->getOpcode () == TargetOpcode::CFI_INSTRUCTION ||
24102514 AArch64InstrInfo::isSEHInstruction (*Pop))
@@ -2440,6 +2544,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24402544 IsSVECalleeSave (LastPopI)) {
24412545 ++LastPopI;
24422546 break ;
2547+ } else if (IsSwiftCoroPartialReturn) {
2548+ assert (!EmitCFI);
2549+ assert (hasFP (MF));
2550+ fixupCalleeSaveRestoreStackOffset (*LastPopI, AFI->getLocalStackSize (),
2551+ NeedsWinCFI, &HasWinCFI);
2552+ // if FP-based addressing, rewrite CSR restores from SP to FP
2553+ fixupCalleeSaveRestoreToFPBased (
2554+ *LastPopI, AFI->getCalleeSaveBaseToFrameRecordOffset ());
24432555 } else if (CombineSPBump)
24442556 fixupCalleeSaveRestoreStackOffset (*LastPopI, AFI->getLocalStackSize (),
24452557 NeedsWinCFI, &HasWinCFI);
@@ -2459,6 +2571,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24592571 }
24602572
24612573 if (hasFP (MF) && AFI->hasSwiftAsyncContext ()) {
2574+ assert (!IsSwiftCoroPartialReturn);
24622575 switch (MF.getTarget ().Options .SwiftAsyncFramePointer ) {
24632576 case SwiftAsyncFramePointerMode::DeploymentBased:
24642577 // Avoid the reload as it is GOT relative, and instead fall back to the
@@ -2492,6 +2605,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
24922605 // If there is a single SP update, insert it before the ret and we're done.
24932606 if (CombineSPBump) {
24942607 assert (!SVEStackSize && " Cannot combine SP bump with SVE" );
2608+ assert (!IsSwiftCoroPartialReturn);
24952609
24962610 // When we are about to restore the CSRs, the CFA register is SP again.
24972611 if (EmitCFI && hasFP (MF)) {
@@ -2577,6 +2691,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25772691 }
25782692
25792693 if (!hasFP (MF)) {
2694+ assert (!IsSwiftCoroPartialReturn);
25802695 bool RedZone = canUseRedZone (MF);
25812696 // If this was a redzone leaf function, we don't need to restore the
25822697 // stack pointer (but we may need to pop stack args for fastcc).
@@ -2607,6 +2722,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
26072722 NumBytes = 0 ;
26082723 }
26092724
2725+ if (IsSwiftCoroPartialReturn)
2726+ return ;
2727+
26102728 // Restore the original stack pointer.
26112729 // FIXME: Rather than doing the math here, we should instead just use
26122730 // non-post-indexed loads for the restores if we aren't actually going to
@@ -3449,9 +3567,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
34493567 DebugLoc DL;
34503568 SmallVector<RegPairInfo, 8 > RegPairs;
34513569 bool NeedsWinCFI = needsWinCFI (MF);
3570+ bool IsSwiftCoroPartialReturn = false ;
34523571
3453- if (MBBI != MBB.end ())
3572+ if (MBBI != MBB.end ()) {
34543573 DL = MBBI->getDebugLoc ();
3574+ IsSwiftCoroPartialReturn = MBBI->getOpcode () == AArch64::RET_POPLESS;
3575+ }
3576+
3577+ // The partial-return intrin/instr requires the swiftcoro cc
3578+ if (IsSwiftCoroPartialReturn &&
3579+ MF.getFunction ().getCallingConv () != CallingConv::SwiftCoro)
3580+ report_fatal_error (" llvm.ret.popless requires swiftcorocc" );
34553581
34563582 computeCalleeSaveRegisterPairs (MF, CSI, TRI, RegPairs, hasFP (MF));
34573583 if (homogeneousPrologEpilog (MF, &MBB)) {
@@ -3464,6 +3590,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
34643590 return true ;
34653591 }
34663592
3593+ // If doing a partial/popless return, CSR restores are from FP, so do it last.
3594+ if (IsSwiftCoroPartialReturn) {
3595+ auto IsFPLR = [](const RegPairInfo &c) {
3596+ return c.Reg1 == AArch64::LR && c.Reg2 == AArch64::FP;
3597+ };
3598+ auto FPLRBegin = std::find_if (RegPairs.begin (), RegPairs.end (), IsFPLR);
3599+ const RegPairInfo FPLRRPI = *FPLRBegin;
3600+ FPLRBegin = std::remove_if (RegPairs.begin (), RegPairs.end (), IsFPLR);
3601+ *FPLRBegin = FPLRRPI;
3602+ }
3603+
34673604 // For performance reasons restore SVE register in increasing order
34683605 auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
34693606 auto PPRBegin = std::find_if (RegPairs.begin (), RegPairs.end (), IsPPR);
@@ -4796,6 +4933,10 @@ void AArch64FrameLowering::orderFrameObjects(
47964933
47974934 const AArch64FunctionInfo &AFI = *MF.getInfo <AArch64FunctionInfo>();
47984935 const MachineFrameInfo &MFI = MF.getFrameInfo ();
4936+
4937+ if (AFI.hasPoplessEpilogue ())
4938+ return ;
4939+
47994940 std::vector<FrameObject> FrameObjects (MFI.getObjectIndexEnd ());
48004941 for (auto &Obj : ObjectsToAllocate) {
48014942 FrameObjects[Obj].IsValid = true ;
0 commit comments