@@ -338,9 +338,11 @@ static bool requiresSaveVG(const MachineFunction &MF);
338338// Conservatively, returns true if the function is likely to have an SVE vectors
339339// on the stack. This function is safe to be called before callee-saves or
340340// object offsets have been determined.
341- static bool isLikelyToHaveSVEStack (MachineFunction &MF) {
341+ static bool isLikelyToHaveSVEStack (const MachineFunction &MF) {
342342 auto *AFI = MF.getInfo <AArch64FunctionInfo>();
343- if (AFI->isSVECC ())
343+ if (MF.getFunction ().getCallingConv () ==
344+ CallingConv::AArch64_SVE_VectorCall ||
345+ AFI->isSVECC ())
344346 return true ;
345347
346348 if (AFI->hasCalculatedStackSizeSVE ())
@@ -532,6 +534,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
532534bool AArch64FrameLowering::hasFPImpl (const MachineFunction &MF) const {
533535 const MachineFrameInfo &MFI = MF.getFrameInfo ();
534536 const TargetRegisterInfo *RegInfo = MF.getSubtarget ().getRegisterInfo ();
537+ const AArch64FunctionInfo &AFI = *MF.getInfo <AArch64FunctionInfo>();
535538
536539 // Win64 EH requires a frame pointer if funclets are present, as the locals
537540 // are accessed off the frame pointer in both the parent function and the
@@ -545,6 +548,16 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
545548 MFI.hasStackMap () || MFI.hasPatchPoint () ||
546549 RegInfo->hasStackRealignment (MF))
547550 return true ;
551+ // If we have streaming mode changes and SVE registers on the stack we need a
552+ // FP. This is as the stack size may depend on the VG at entry to the
553+ // function, which is saved before the SVE area (so unrecoverable without a
554+ // FP). Similar for locally streaming functions, but it is because we use
555+ // ADDSVL to setup the SVE stack (which might not match VG, even without
556+ // streaming-mode changes).
557+ if (AFI.needsDwarfUnwindInfo (MF) &&
558+ ((requiresSaveVG (MF) || AFI.getSMEFnAttrs ().hasStreamingBody ()) &&
559+ (!AFI.hasCalculatedStackSizeSVE () || AFI.getStackSizeSVE () > 0 )))
560+ return true ;
548561 // With large callframes around we may need to use FP to access the scavenging
549562 // emergency spillslot.
550563 //
@@ -663,10 +676,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
663676 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
664677 MachineFunction &MF = *MBB.getParent ();
665678 MachineFrameInfo &MFI = MF.getFrameInfo ();
666- AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
667- SMEAttrs Attrs = AFI->getSMEFnAttrs ();
668- bool LocallyStreaming =
669- Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface ();
670679
671680 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
672681 if (CSI.empty ())
@@ -680,14 +689,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
680689
681690 assert (!Info.isSpilledToReg () && " Spilling to registers not implemented" );
682691 int64_t Offset = MFI.getObjectOffset (FrameIdx) - getOffsetOfLocalArea ();
683-
684- // The location of VG will be emitted before each streaming-mode change in
685- // the function. Only locally-streaming functions require emitting the
686- // non-streaming VG location here.
687- if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx ()) ||
688- (!LocallyStreaming && Info.getReg () == AArch64::VG))
689- continue ;
690-
691692 CFIBuilder.buildOffset (Info.getReg (), Offset);
692693 }
693694}
@@ -707,8 +708,16 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations(
707708 AArch64FunctionInfo &AFI = *MF.getInfo <AArch64FunctionInfo>();
708709 CFIInstBuilder CFIBuilder (MBB, MBBI, MachineInstr::FrameSetup);
709710
711+ std::optional<int64_t > IncomingVGOffsetFromDefCFA;
712+ if (requiresSaveVG (MF)) {
713+ auto IncomingVG = *find_if (
714+ reverse (CSI), [](auto &Info) { return Info.getReg () == AArch64::VG; });
715+ IncomingVGOffsetFromDefCFA =
716+ MFI.getObjectOffset (IncomingVG.getFrameIdx ()) - getOffsetOfLocalArea ();
717+ }
718+
710719 for (const auto &Info : CSI) {
711- if (!( MFI.getStackID (Info.getFrameIdx ()) == TargetStackID::ScalableVector) )
720+ if (MFI.getStackID (Info.getFrameIdx ()) != TargetStackID::ScalableVector)
712721 continue ;
713722
714723 // Not all unwinders may know about SVE registers, so assume the lowest
@@ -722,7 +731,8 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations(
722731 StackOffset::getScalable (MFI.getObjectOffset (Info.getFrameIdx ())) -
723732 StackOffset::getFixed (AFI.getCalleeSavedStackSize (MFI));
724733
725- CFIBuilder.insertCFIInst (createCFAOffset (TRI, Reg, Offset));
734+ CFIBuilder.insertCFIInst (
735+ createCFAOffset (TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
726736 }
727737}
728738
@@ -1465,10 +1475,10 @@ bool requiresGetVGCall(MachineFunction &MF) {
14651475
14661476static bool requiresSaveVG (const MachineFunction &MF) {
14671477 const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1478+ if (!AFI->needsDwarfUnwindInfo (MF) || !AFI->hasStreamingModeChanges ())
1479+ return false ;
14681480 // For Darwin platforms we don't save VG for non-SVE functions, even if SME
14691481 // is enabled with streaming mode changes.
1470- if (!AFI->hasStreamingModeChanges ())
1471- return false ;
14721482 auto &ST = MF.getSubtarget <AArch64Subtarget>();
14731483 if (ST.isTargetDarwin ())
14741484 return ST.hasSVE ();
@@ -1484,8 +1494,7 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
14841494bool isVGInstruction (MachineBasicBlock::iterator MBBI,
14851495 const TargetLowering &TLI) {
14861496 unsigned Opc = MBBI->getOpcode ();
1487- if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1488- Opc == AArch64::UBFMXri)
1497+ if (Opc == AArch64::CNTD_XPiI)
14891498 return true ;
14901499
14911500 if (!requiresGetVGCall (*MBBI->getMF ()))
@@ -1509,9 +1518,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
15091518 unsigned NewOpc;
15101519
15111520 // If the function contains streaming mode changes, we expect instructions
1512- // to calculate the value of VG before spilling. For locally-streaming
1513- // functions, we need to do this for both the streaming and non-streaming
1514- // vector length. Move past these instructions if necessary.
1521+ // to calculate the value of VG before spilling. Move past these instructions
1522+ // if necessary.
15151523 MachineFunction &MF = *MBB.getParent ();
15161524 if (requiresSaveVG (MF)) {
15171525 auto &TLI = *MF.getSubtarget ().getTargetLowering ();
@@ -3475,7 +3483,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
34753483 MachineFunction &MF = *MBB.getParent ();
34763484 auto &TLI = *MF.getSubtarget <AArch64Subtarget>().getTargetLowering ();
34773485 const TargetInstrInfo &TII = *MF.getSubtarget ().getInstrInfo ();
3478- AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
34793486 bool NeedsWinCFI = needsWinCFI (MF);
34803487 DebugLoc DL;
34813488 SmallVector<RegPairInfo, 8 > RegPairs;
@@ -3544,40 +3551,31 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35443551 }
35453552
35463553 unsigned X0Scratch = AArch64::NoRegister;
3554+ auto RestoreX0 = make_scope_exit ([&] {
3555+ if (X0Scratch != AArch64::NoRegister)
3556+ BuildMI (MBB, MI, DL, TII.get (AArch64::ORRXrr), AArch64::X0)
3557+ .addReg (AArch64::XZR)
3558+ .addReg (X0Scratch, RegState::Undef)
3559+ .addReg (X0Scratch, RegState::Implicit)
3560+ .setMIFlag (MachineInstr::FrameSetup);
3561+ });
3562+
35473563 if (Reg1 == AArch64::VG) {
35483564 // Find an available register to store value of VG to.
35493565 Reg1 = findScratchNonCalleeSaveRegister (&MBB, true );
35503566 assert (Reg1 != AArch64::NoRegister);
3551- SMEAttrs Attrs = AFI->getSMEFnAttrs ();
3552-
3553- if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface () &&
3554- AFI->getStreamingVGIdx () == std::numeric_limits<int >::max ()) {
3555- // For locally-streaming functions, we need to store both the streaming
3556- // & non-streaming VG. Spill the streaming value first.
3557- BuildMI (MBB, MI, DL, TII.get (AArch64::RDSVLI_XI), Reg1)
3558- .addImm (1 )
3559- .setMIFlag (MachineInstr::FrameSetup);
3560- BuildMI (MBB, MI, DL, TII.get (AArch64::UBFMXri), Reg1)
3561- .addReg (Reg1)
3562- .addImm (3 )
3563- .addImm (63 )
3564- .setMIFlag (MachineInstr::FrameSetup);
3565-
3566- AFI->setStreamingVGIdx (RPI.FrameIdx );
3567- } else if (MF.getSubtarget <AArch64Subtarget>().hasSVE ()) {
3567+ if (MF.getSubtarget <AArch64Subtarget>().hasSVE ()) {
35683568 BuildMI (MBB, MI, DL, TII.get (AArch64::CNTD_XPiI), Reg1)
35693569 .addImm (31 )
35703570 .addImm (1 )
35713571 .setMIFlag (MachineInstr::FrameSetup);
3572- AFI->setVGIdx (RPI.FrameIdx );
35733572 } else {
35743573 const AArch64Subtarget &STI = MF.getSubtarget <AArch64Subtarget>();
3575- if (llvm::any_of (
3576- MBB.liveins (),
3577- [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3578- return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
3579- AArch64::X0, LiveIn.PhysReg );
3580- }))
3574+ if (any_of (MBB.liveins (),
3575+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3576+ return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
3577+ AArch64::X0, LiveIn.PhysReg );
3578+ }))
35813579 X0Scratch = Reg1;
35823580
35833581 if (X0Scratch != AArch64::NoRegister)
@@ -3596,7 +3594,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35963594 .addReg (AArch64::X0, RegState::ImplicitDefine)
35973595 .setMIFlag (MachineInstr::FrameSetup);
35983596 Reg1 = AArch64::X0;
3599- AFI->setVGIdx (RPI.FrameIdx );
36003597 }
36013598 }
36023599
@@ -3691,13 +3688,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
36913688 if (RPI.isPaired ())
36923689 MFI.setStackID (FrameIdxReg2, TargetStackID::ScalableVector);
36933690 }
3694-
3695- if (X0Scratch != AArch64::NoRegister)
3696- BuildMI (MBB, MI, DL, TII.get (AArch64::ORRXrr), AArch64::X0)
3697- .addReg (AArch64::XZR)
3698- .addReg (X0Scratch, RegState::Undef)
3699- .addReg (X0Scratch, RegState::Implicit)
3700- .setMIFlag (MachineInstr::FrameSetup);
37013691 }
37023692 return true ;
37033693}
@@ -4076,15 +4066,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
40764066
40774067 // Increase the callee-saved stack size if the function has streaming mode
40784068 // changes, as we will need to spill the value of the VG register.
4079- // For locally streaming functions, we spill both the streaming and
4080- // non-streaming VG value.
4081- SMEAttrs Attrs = AFI->getSMEFnAttrs ();
4082- if (requiresSaveVG (MF)) {
4083- if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface ())
4084- CSStackSize += 16 ;
4085- else
4086- CSStackSize += 8 ;
4087- }
4069+ if (requiresSaveVG (MF))
4070+ CSStackSize += 8 ;
40884071
40894072 // Determine if a Hazard slot should be used, and increase the CSStackSize by
40904073 // StackHazardSize if so.
@@ -4235,29 +4218,19 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
42354218
42364219 // Insert VG into the list of CSRs, immediately before LR if saved.
42374220 if (requiresSaveVG (MF)) {
4238- std::vector<CalleeSavedInfo> VGSaves;
4239- SMEAttrs Attrs = AFI->getSMEFnAttrs ();
4240-
4241- auto VGInfo = CalleeSavedInfo (AArch64::VG);
4221+ CalleeSavedInfo VGInfo (AArch64::VG);
42424222 VGInfo.setRestored (false );
4243- VGSaves.push_back (VGInfo);
4244-
4245- // Add VG again if the function is locally-streaming, as we will spill two
4246- // values.
4247- if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface ())
4248- VGSaves.push_back (VGInfo);
4249-
4250- bool InsertBeforeLR = false ;
42514223
4224+ bool InsertedBeforeLR = false ;
42524225 for (unsigned I = 0 ; I < CSI.size (); I++)
42534226 if (CSI[I].getReg () == AArch64::LR) {
4254- InsertBeforeLR = true ;
4255- CSI.insert (CSI.begin () + I, VGSaves. begin (), VGSaves. end () );
4227+ InsertedBeforeLR = true ;
4228+ CSI.insert (CSI.begin () + I, VGInfo );
42564229 break ;
42574230 }
42584231
4259- if (!InsertBeforeLR )
4260- llvm::append_range ( CSI, VGSaves );
4232+ if (!InsertedBeforeLR )
4233+ CSI. push_back (VGInfo );
42614234 }
42624235
42634236 Register LastReg = 0 ;
@@ -5260,46 +5233,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
52605233}
52615234} // namespace
52625235
5263- static void emitVGSaveRestore (MachineBasicBlock::iterator II,
5264- const AArch64FrameLowering *TFI) {
5265- MachineInstr &MI = *II;
5266- MachineBasicBlock *MBB = MI.getParent ();
5267- MachineFunction *MF = MBB->getParent ();
5268-
5269- if (MI.getOpcode () != AArch64::VGSavePseudo &&
5270- MI.getOpcode () != AArch64::VGRestorePseudo)
5271- return ;
5272-
5273- auto *AFI = MF->getInfo <AArch64FunctionInfo>();
5274- SMEAttrs FuncAttrs = AFI->getSMEFnAttrs ();
5275- bool LocallyStreaming =
5276- FuncAttrs.hasStreamingBody () && !FuncAttrs.hasStreamingInterface ();
5277-
5278- int64_t VGFrameIdx =
5279- LocallyStreaming ? AFI->getStreamingVGIdx () : AFI->getVGIdx ();
5280- assert (VGFrameIdx != std::numeric_limits<int >::max () &&
5281- " Expected FrameIdx for VG" );
5282-
5283- CFIInstBuilder CFIBuilder (*MBB, II, MachineInstr::NoFlags);
5284- if (MI.getOpcode () == AArch64::VGSavePseudo) {
5285- const MachineFrameInfo &MFI = MF->getFrameInfo ();
5286- int64_t Offset =
5287- MFI.getObjectOffset (VGFrameIdx) - TFI->getOffsetOfLocalArea ();
5288- CFIBuilder.buildOffset (AArch64::VG, Offset);
5289- } else {
5290- CFIBuilder.buildRestore (AArch64::VG);
5291- }
5292-
5293- MI.eraseFromParent ();
5294- }
5295-
52965236void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced (
52975237 MachineFunction &MF, RegScavenger *RS = nullptr ) const {
52985238 for (auto &BB : MF)
52995239 for (MachineBasicBlock::iterator II = BB.begin (); II != BB.end ();) {
5300- if (requiresSaveVG (MF))
5301- emitVGSaveRestore (II++, this );
5302- else if (StackTaggingMergeSetTag)
5240+ if (StackTaggingMergeSetTag)
53035241 II = tryMergeAdjacentSTG (II, this , RS);
53045242 }
53055243
0 commit comments