@@ -327,7 +327,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
327327static bool produceCompactUnwindFrame (MachineFunction &MF);
328328static bool needsWinCFI (const MachineFunction &MF);
329329static StackOffset getSVEStackSize (const MachineFunction &MF);
330- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB);
330+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB, bool HasCall=false );
331+ static bool requiresSaveVG (const MachineFunction &MF);
331332
332333// / Returns true if a homogeneous prolog or epilog code can be emitted
333334// / for the size optimization. If possible, a frame helper call is injected.
@@ -1002,6 +1003,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
10021003 }
10031004}
10041005
1006+ static bool windowsRequiresStackProbe (const MachineFunction &MF,
1007+ uint64_t StackSizeInBytes) {
1008+ const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
1009+ const AArch64FunctionInfo &MFI = *MF.getInfo <AArch64FunctionInfo>();
1010+ // TODO: When implementing stack protectors, take that into account
1011+ // for the probe threshold.
1012+ return Subtarget.isTargetWindows () && MFI.hasStackProbing () &&
1013+ StackSizeInBytes >= uint64_t (MFI.getStackProbeSize ());
1014+ }
1015+
10051016static void getLiveRegsForEntryMBB (LivePhysRegs &LiveRegs,
10061017 const MachineBasicBlock &MBB) {
10071018 const MachineFunction *MF = MBB.getParent ();
@@ -1023,7 +1034,7 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
10231034// but we would then have to make sure that we were in fact saving at least one
10241035// callee-save register in the prologue, which is additional complexity that
10251036// doesn't seem worth the benefit.
1026- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB) {
1037+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB, bool HasCall ) {
10271038 MachineFunction *MF = MBB->getParent ();
10281039
10291040 // If MBB is an entry block, use X9 as the scratch register
@@ -1037,6 +1048,11 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
10371048 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo ();
10381049 LivePhysRegs LiveRegs (TRI);
10391050 getLiveRegsForEntryMBB (LiveRegs, *MBB);
1051+ if (HasCall) {
1052+ LiveRegs.addReg (AArch64::X16);
1053+ LiveRegs.addReg (AArch64::X17);
1054+ LiveRegs.addReg (AArch64::X18);
1055+ }
10401056
10411057 // Prefer X9 since it was historically used for the prologue scratch reg.
10421058 const MachineRegisterInfo &MRI = MF->getRegInfo ();
@@ -1077,23 +1093,16 @@ bool AArch64FrameLowering::canUseAsPrologue(
10771093 MBB.isLiveIn (AArch64::NZCV))
10781094 return false ;
10791095
1080- // Don't need a scratch register if we're not going to re-align the stack or
1081- // emit stack probes.
1082- if (!RegInfo->hasStackRealignment (*MF) && !TLI->hasInlineStackProbe (*MF))
1083- return true ;
1084- // Otherwise, we can use any block as long as it has a scratch register
1085- // available.
1086- return findScratchNonCalleeSaveRegister (TmpMBB) != AArch64::NoRegister;
1087- }
1096+ if (RegInfo->hasStackRealignment (*MF) || TLI->hasInlineStackProbe (*MF))
1097+ if (findScratchNonCalleeSaveRegister (TmpMBB) == AArch64::NoRegister)
1098+ return false ;
10881099
1089- static bool windowsRequiresStackProbe (MachineFunction &MF,
1090- uint64_t StackSizeInBytes) {
1091- const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
1092- const AArch64FunctionInfo &MFI = *MF.getInfo <AArch64FunctionInfo>();
1093- // TODO: When implementing stack protectors, take that into account
1094- // for the probe threshold.
1095- return Subtarget.isTargetWindows () && MFI.hasStackProbing () &&
1096- StackSizeInBytes >= uint64_t (MFI.getStackProbeSize ());
1100+ // May need a scratch register (for return value) if require making a special call
1101+ if (requiresSaveVG (*MF) || windowsRequiresStackProbe (*MF, std::numeric_limits<uint64_t >::max ()))
1102+ if (findScratchNonCalleeSaveRegister (TmpMBB, true ) == AArch64::NoRegister)
1103+ return false ;
1104+
1105+ return true ;
10971106}
10981107
10991108static bool needsWinCFI (const MachineFunction &MF) {
@@ -1356,8 +1365,8 @@ bool requiresGetVGCall(MachineFunction &MF) {
13561365 !MF.getSubtarget <AArch64Subtarget>().hasSVE ();
13571366}
13581367
1359- static bool requiresSaveVG (MachineFunction &MF) {
1360- AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1368+ static bool requiresSaveVG (const MachineFunction &MF) {
1369+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
13611370 // For Darwin platforms we don't save VG for non-SVE functions, even if SME
13621371 // is enabled with streaming mode changes.
13631372 if (!AFI->hasStreamingModeChanges ())
@@ -1991,8 +2000,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19912000 return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
19922001 AArch64::X15, LiveIn.PhysReg );
19932002 })) {
1994- X15Scratch = findScratchNonCalleeSaveRegister (&MBB);
1995- assert (X15Scratch != AArch64::NoRegister);
2003+ X15Scratch = findScratchNonCalleeSaveRegister (&MBB, true );
2004+ assert (X15Scratch != AArch64::NoRegister && (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17) );
19962005#ifndef NDEBUG
19972006 LiveRegs.removeReg (AArch64::X15); // ignore X15 since we restore it
19982007#endif
@@ -3236,7 +3245,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
32363245 unsigned X0Scratch = AArch64::NoRegister;
32373246 if (Reg1 == AArch64::VG) {
32383247 // Find an available register to store value of VG to.
3239- Reg1 = findScratchNonCalleeSaveRegister (&MBB);
3248+ Reg1 = findScratchNonCalleeSaveRegister (&MBB, true );
32403249 assert (Reg1 != AArch64::NoRegister);
32413250 SMEAttrs Attrs (MF.getFunction ());
32423251
0 commit comments