@@ -333,7 +333,10 @@ static bool needsWinCFI(const MachineFunction &MF);
333333static StackOffset getSVEStackSize (const MachineFunction &MF);
334334static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
335335 bool HasCall = false );
336- static bool requiresSaveVG (const MachineFunction &MF);
336+ static bool requiresSaveVG (const MachineFunction &MF) {
337+ return MF.getSubtarget <AArch64Subtarget>().getRegisterInfo ()->requiresSaveVG (
338+ MF);
339+ }
337340
338341// / Returns true if a homogeneous prolog or epilog code can be emitted
339342// / for the size optimization. If possible, a frame helper call is injected.
@@ -1105,8 +1108,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
11051108
11061109 // May need a scratch register (for return value) if require making a special
11071110 // call
1108- if (requiresSaveVG (*MF) ||
1109- windowsRequiresStackProbe (*MF, std::numeric_limits<uint64_t >::max ()))
1111+ if (windowsRequiresStackProbe (*MF, std::numeric_limits<uint64_t >::max ()))
11101112 if (findScratchNonCalleeSaveRegister (TmpMBB, true ) == AArch64::NoRegister)
11111113 return false ;
11121114
@@ -1391,38 +1393,6 @@ bool requiresGetVGCall(MachineFunction &MF) {
13911393 !MF.getSubtarget <AArch64Subtarget>().hasSVE ();
13921394}
13931395
1394- static bool requiresSaveVG (const MachineFunction &MF) {
1395- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1396- // For Darwin platforms we don't save VG for non-SVE functions, even if SME
1397- // is enabled with streaming mode changes.
1398- if (!AFI->hasStreamingModeChanges ())
1399- return false ;
1400- auto &ST = MF.getSubtarget <AArch64Subtarget>();
1401- if (ST.isTargetDarwin ())
1402- return ST.hasSVE ();
1403- return true ;
1404- }
1405-
1406- bool isVGInstruction (MachineBasicBlock::iterator MBBI) {
1407- unsigned Opc = MBBI->getOpcode ();
1408- if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1409- Opc == AArch64::UBFMXri)
1410- return true ;
1411-
1412- if (requiresGetVGCall (*MBBI->getMF ())) {
1413- if (Opc == AArch64::ORRXrr)
1414- return true ;
1415-
1416- if (Opc == AArch64::BL) {
1417- auto Op1 = MBBI->getOperand (0 );
1418- return Op1.isSymbol () &&
1419- (StringRef (Op1.getSymbolName ()) == " __arm_get_current_vg" );
1420- }
1421- }
1422-
1423- return false ;
1424- }
1425-
14261396// Convert callee-save register save/restore instruction to do stack pointer
14271397// decrement/increment to allocate/deallocate the callee-save stack area by
14281398// converting store/load to use pre/post increment version.
@@ -1434,15 +1404,6 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
14341404 int CFAOffset = 0 ) {
14351405 unsigned NewOpc;
14361406
1437- // If the function contains streaming mode changes, we expect instructions
1438- // to calculate the value of VG before spilling. For locally-streaming
1439- // functions, we need to do this for both the streaming and non-streaming
1440- // vector length. Move past these instructions if necessary.
1441- MachineFunction &MF = *MBB.getParent ();
1442- if (requiresSaveVG (MF))
1443- while (isVGInstruction (MBBI))
1444- ++MBBI;
1445-
14461407 switch (MBBI->getOpcode ()) {
14471408 default :
14481409 llvm_unreachable (" Unexpected callee-save save/restore opcode!" );
@@ -1979,9 +1940,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19791940 // pointer bump above.
19801941 while (MBBI != End && MBBI->getFlag (MachineInstr::FrameSetup) &&
19811942 !IsSVECalleeSave (MBBI)) {
1982- if (CombineSPBump &&
1983- // Only fix-up frame-setup load/store instructions.
1984- (!requiresSaveVG (MF) || !isVGInstruction (MBBI)))
1943+ if (CombineSPBump)
19851944 fixupCalleeSaveRestoreStackOffset (*MBBI, AFI->getLocalStackSize (),
19861945 NeedsWinCFI, &HasWinCFI);
19871946 ++MBBI;
@@ -3403,66 +3362,19 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
34033362 StrOpc =
34043363 Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI;
34053364 break ;
3406- case RegPairInfo::VG:
3407- StrOpc = AArch64::STRXui;
3408- break ;
3409- }
3410-
3411- unsigned X0Scratch = AArch64::NoRegister;
3412- if (Reg1 == AArch64::VG) {
3413- // Find an available register to store value of VG to.
3414- Reg1 = findScratchNonCalleeSaveRegister (&MBB, true );
3415- assert (Reg1 != AArch64::NoRegister);
3365+ case RegPairInfo::VG: {
34163366 SMEAttrs Attrs = AFI->getSMEFnAttrs ();
3417-
34183367 if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface () &&
34193368 AFI->getStreamingVGIdx () == std::numeric_limits<int >::max ()) {
34203369 // For locally-streaming functions, we need to store both the streaming
3421- // & non-streaming VG. Spill the streaming value first.
3422- BuildMI (MBB, MI, DL, TII.get (AArch64::RDSVLI_XI), Reg1)
3423- .addImm (1 )
3424- .setMIFlag (MachineInstr::FrameSetup);
3425- BuildMI (MBB, MI, DL, TII.get (AArch64::UBFMXri), Reg1)
3426- .addReg (Reg1)
3427- .addImm (3 )
3428- .addImm (63 )
3429- .setMIFlag (MachineInstr::FrameSetup);
3430-
3370+ // & non-streaming VG.
34313371 AFI->setStreamingVGIdx (RPI.FrameIdx );
3432- } else if (MF.getSubtarget <AArch64Subtarget>().hasSVE ()) {
3433- BuildMI (MBB, MI, DL, TII.get (AArch64::CNTD_XPiI), Reg1)
3434- .addImm (31 )
3435- .addImm (1 )
3436- .setMIFlag (MachineInstr::FrameSetup);
3437- AFI->setVGIdx (RPI.FrameIdx );
34383372 } else {
3439- const AArch64Subtarget &STI = MF.getSubtarget <AArch64Subtarget>();
3440- if (llvm::any_of (
3441- MBB.liveins (),
3442- [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3443- return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
3444- AArch64::X0, LiveIn.PhysReg );
3445- }))
3446- X0Scratch = Reg1;
3447-
3448- if (X0Scratch != AArch64::NoRegister)
3449- BuildMI (MBB, MI, DL, TII.get (AArch64::ORRXrr), Reg1)
3450- .addReg (AArch64::XZR)
3451- .addReg (AArch64::X0, RegState::Undef)
3452- .addReg (AArch64::X0, RegState::Implicit)
3453- .setMIFlag (MachineInstr::FrameSetup);
3454-
3455- const uint32_t *RegMask = TRI->getCallPreservedMask (
3456- MF,
3457- CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
3458- BuildMI (MBB, MI, DL, TII.get (AArch64::BL))
3459- .addExternalSymbol (" __arm_get_current_vg" )
3460- .addRegMask (RegMask)
3461- .addReg (AArch64::X0, RegState::ImplicitDefine)
3462- .setMIFlag (MachineInstr::FrameSetup);
3463- Reg1 = AArch64::X0;
34643373 AFI->setVGIdx (RPI.FrameIdx );
34653374 }
3375+ // VG will be written to the frame indices immediately after the prologue.
3376+ continue ;
3377+ }
34663378 }
34673379
34683380 LLVM_DEBUG (dbgs () << " CSR spill: (" << printReg (Reg1, TRI);
@@ -3556,13 +3468,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35563468 if (RPI.isPaired ())
35573469 MFI.setStackID (FrameIdxReg2, TargetStackID::ScalableVector);
35583470 }
3559-
3560- if (X0Scratch != AArch64::NoRegister)
3561- BuildMI (MBB, MI, DL, TII.get (AArch64::ORRXrr), AArch64::X0)
3562- .addReg (AArch64::XZR)
3563- .addReg (X0Scratch, RegState::Undef)
3564- .addReg (X0Scratch, RegState::Implicit)
3565- .setMIFlag (MachineInstr::FrameSetup);
35663471 }
35673472 return true ;
35683473}
@@ -4092,31 +3997,19 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
40923997 MaxCSFrameIndex = FrameIdx;
40933998 }
40943999
4095- // Insert VG into the list of CSRs, immediately before LR if saved.
40964000 if (requiresSaveVG (MF)) {
4097- std::vector<CalleeSavedInfo> VGSaves;
4098- SMEAttrs Attrs = AFI->getSMEFnAttrs ();
4099-
4100- auto VGInfo = CalleeSavedInfo (AArch64::VG);
4001+ CalleeSavedInfo VGInfo (AArch64::VG);
41014002 VGInfo.setRestored (false );
4102- VGSaves. push_back ( VGInfo) ;
4003+ SmallVector<CalleeSavedInfo, 2 > VGSaves{ VGInfo} ;
41034004
41044005 // Add VG again if the function is locally-streaming, as we will spill two
41054006 // values.
4007+ SMEAttrs Attrs = AFI->getSMEFnAttrs ();
41064008 if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface ())
41074009 VGSaves.push_back (VGInfo);
41084010
4109- bool InsertBeforeLR = false ;
4110-
4111- for (unsigned I = 0 ; I < CSI.size (); I++)
4112- if (CSI[I].getReg () == AArch64::LR) {
4113- InsertBeforeLR = true ;
4114- CSI.insert (CSI.begin () + I, VGSaves.begin (), VGSaves.end ());
4115- break ;
4116- }
4117-
4118- if (!InsertBeforeLR)
4119- llvm::append_range (CSI, VGSaves);
4011+ // Insert the VG saves at the start of the CSI (alongside GPRs).
4012+ CSI.insert (CSI.begin (), VGSaves.begin (), VGSaves.end ());
41204013 }
41214014
41224015 Register LastReg = 0 ;
@@ -5135,13 +5028,28 @@ static void emitVGSaveRestore(MachineBasicBlock::iterator II,
51355028 MI.eraseFromParent ();
51365029}
51375030
5031+ static void replaceVGTargetIndices (MachineBasicBlock::iterator II,
5032+ AArch64FunctionInfo *AFI) {
5033+ for (auto &MO : II->explicit_operands ()) {
5034+ if (MO.isTargetIndex ()) {
5035+ if (MO.getIndex () == AArch64::SAVED_STREAMING_VG_SLOT)
5036+ MO.ChangeToFrameIndex (AFI->getStreamingVGIdx ());
5037+ if (MO.getIndex () == AArch64::SAVED_VG_SLOT)
5038+ MO.ChangeToFrameIndex (AFI->getVGIdx ());
5039+ }
5040+ }
5041+ }
5042+
51385043void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced (
51395044 MachineFunction &MF, RegScavenger *RS = nullptr ) const {
5045+ bool VGSaved = requiresSaveVG (MF);
5046+ AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
51405047 for (auto &BB : MF)
51415048 for (MachineBasicBlock::iterator II = BB.begin (); II != BB.end ();) {
5142- if (requiresSaveVG (MF))
5049+ if (VGSaved) {
5050+ replaceVGTargetIndices (II, AFI);
51435051 emitVGSaveRestore (II++, this );
5144- else if (StackTaggingMergeSetTag)
5052+ } else if (StackTaggingMergeSetTag)
51455053 II = tryMergeAdjacentSTG (II, this , RS);
51465054 }
51475055}
0 commit comments