@@ -4212,20 +4212,22 @@ struct ScopedScavengeOrSpill {
42124212 Register SpillCandidate, const TargetRegisterClass &RC,
42134213 LiveRegUnits const &UsedRegs,
42144214 BitVector const &AllocatableRegs,
4215- std::optional<int > & MaybeSpillFI)
4215+ std::optional<int > * MaybeSpillFI)
42164216 : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast <const AArch64InstrInfo &>(
42174217 *MF.getSubtarget().getInstrInfo())),
42184218 TRI (*MF.getSubtarget().getRegisterInfo()) {
42194219 FreeReg = tryScavengeRegister (UsedRegs, AllocatableRegs);
42204220 if (FreeReg != AArch64::NoRegister)
42214221 return ;
4222- if (!MaybeSpillFI) {
4222+ assert (MaybeSpillFI && " Expected emergency spill slot FI information "
4223+ " (attempted to spill in prologue/epilogue?)" );
4224+ if (!MaybeSpillFI->has_value ()) {
42234225 MachineFrameInfo &MFI = MF.getFrameInfo ();
4224- MaybeSpillFI = MFI.CreateSpillStackObject (TRI.getSpillSize (RC),
4225- TRI.getSpillAlign (RC));
4226+ * MaybeSpillFI = MFI.CreateSpillStackObject (TRI.getSpillSize (RC),
4227+ TRI.getSpillAlign (RC));
42264228 }
42274229 FreeReg = SpilledReg = SpillCandidate;
4228- SpillFI = * MaybeSpillFI;
4230+ SpillFI = MaybeSpillFI-> value () ;
42294231 TII.storeRegToStackSlot (MBB, MBBI, SpilledReg, false , SpillFI, &RC, &TRI,
42304232 Register ());
42314233 }
@@ -4256,6 +4258,18 @@ struct EmergencyStackSlots {
42564258 std::optional<int > GPRSpillFI;
42574259};
42584260
4261+ // / Registers available for scavenging (ZPR, PPR3b, GPR).
4262+ struct ScavengeableRegs {
4263+ BitVector ZPRRegs;
4264+ BitVector PPR3bRegs;
4265+ BitVector GPRRegs;
4266+ };
4267+
4268+ static bool isInPrologueOrEpilogue (const MachineInstr &MI) {
4269+ return MI.getFlag (MachineInstr::FrameSetup) ||
4270+ MI.getFlag (MachineInstr::FrameDestroy);
4271+ }
4272+
42594273// / Expands:
42604274// / ```
42614275// / SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0
@@ -4271,24 +4285,17 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
42714285 MachineInstr &MI,
42724286 const TargetRegisterInfo &TRI,
42734287 LiveRegUnits const &UsedRegs,
4274- BitVector const &ZPRRegs ,
4288+ ScavengeableRegs const &Regs ,
42754289 EmergencyStackSlots &SpillSlots) {
42764290 MachineFunction &MF = *MBB.getParent ();
42774291 auto *TII =
42784292 static_cast <const AArch64InstrInfo *>(MF.getSubtarget ().getInstrInfo ());
42794293
42804294 Register ZPredReg = AArch64::NoRegister;
4281- ScopedScavengeOrSpill FindZPRReg (MF, MBB, MachineBasicBlock::iterator (MI),
4282- ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
4283- UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI );
4284-
4285- #ifndef NDEBUG
4286- bool InPrologueOrEpilogue = MI.getFlag (MachineInstr::FrameSetup) ||
4287- MI.getFlag (MachineInstr::FrameDestroy);
4288- assert ((!FindZPRReg.hasSpilled () || !InPrologueOrEpilogue) &&
4289- " SPILL_PPR_TO_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
4290- " or epilogue" );
4291- #endif
4295+ ScopedScavengeOrSpill FindZPRReg (
4296+ MF, MBB, MachineBasicBlock::iterator (MI), ZPredReg, AArch64::Z0,
4297+ AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs ,
4298+ isInPrologueOrEpilogue (MI) ? nullptr : &SpillSlots.ZPRSpillFI );
42924299
42934300 SmallVector<MachineInstr *, 2 > MachineInstrs;
42944301 const DebugLoc &DL = MI.getDebugLoc ();
@@ -4321,44 +4328,37 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
43214328// / spilling if necessary). If the status flags are in use at the point of
43224329// / expansion they are preserved (by moving them to/from a GPR). This may cause
43234330// / an additional spill if no GPR is free at the expansion point.
4324- static bool expandFillPPRFromZPRSlotPseudo (
4325- MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
4326- LiveRegUnits const &UsedRegs, BitVector const &ZPRRegs,
4327- BitVector const &PPR3bRegs, BitVector const &GPRRegs,
4328- EmergencyStackSlots &SpillSlots) {
4331+ static bool expandFillPPRFromZPRSlotPseudo (MachineBasicBlock &MBB,
4332+ MachineInstr &MI,
4333+ const TargetRegisterInfo &TRI,
4334+ LiveRegUnits const &UsedRegs,
4335+ ScavengeableRegs const &Regs,
4336+ EmergencyStackSlots &SpillSlots) {
43294337 MachineFunction &MF = *MBB.getParent ();
43304338 auto *TII =
43314339 static_cast <const AArch64InstrInfo *>(MF.getSubtarget ().getInstrInfo ());
43324340
43334341 Register ZPredReg = AArch64::NoRegister;
4334- ScopedScavengeOrSpill FindZPRReg (MF, MBB, MachineBasicBlock::iterator (MI),
4335- ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
4336- UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI );
4342+ ScopedScavengeOrSpill FindZPRReg (
4343+ MF, MBB, MachineBasicBlock::iterator (MI), ZPredReg, AArch64::Z0,
4344+ AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs ,
4345+ isInPrologueOrEpilogue (MI) ? nullptr : &SpillSlots.ZPRSpillFI );
43374346
43384347 Register PredReg = AArch64::NoRegister;
43394348 ScopedScavengeOrSpill FindPPR3bReg (
43404349 MF, MBB, MachineBasicBlock::iterator (MI), PredReg, AArch64::P0,
4341- AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI );
4350+ AArch64::PPR_3bRegClass, UsedRegs, Regs.PPR3bRegs ,
4351+ isInPrologueOrEpilogue (MI) ? nullptr : &SpillSlots.PPRSpillFI );
43424352
43434353 // Elide NZCV spills if we know it is not used.
43444354 Register NZCVSaveReg = AArch64::NoRegister;
43454355 bool IsNZCVUsed = !UsedRegs.available (AArch64::NZCV);
43464356 std::optional<ScopedScavengeOrSpill> FindGPRReg;
43474357 if (IsNZCVUsed)
4348- FindGPRReg.emplace (MF, MBB, MachineBasicBlock::iterator (MI), NZCVSaveReg,
4349- AArch64::X0, AArch64::GPR64RegClass, UsedRegs, GPRRegs,
4350- SpillSlots.GPRSpillFI );
4351-
4352- #ifndef NDEBUG
4353- bool Spilled = FindZPRReg.hasSpilled () || FindPPR3bReg.hasSpilled () ||
4354- (FindGPRReg && FindGPRReg->hasSpilled ());
4355- bool InPrologueOrEpilogue = MI.getFlag (MachineInstr::FrameSetup) ||
4356- MI.getFlag (MachineInstr::FrameDestroy);
4357- assert ((!Spilled || !InPrologueOrEpilogue) &&
4358- " FILL_PPR_FROM_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
4359- " or epilogue" );
4360- #endif
4361-
4358+ FindGPRReg.emplace (
4359+ MF, MBB, MachineBasicBlock::iterator (MI), NZCVSaveReg, AArch64::X0,
4360+ AArch64::GPR64RegClass, UsedRegs, Regs.GPRRegs ,
4361+ isInPrologueOrEpilogue (MI) ? nullptr : &SpillSlots.GPRSpillFI );
43624362 SmallVector<MachineInstr *, 4 > MachineInstrs;
43634363 const DebugLoc &DL = MI.getDebugLoc ();
43644364 MachineInstrs.push_back (BuildMI (MBB, MI, DL, TII->get (AArch64::LDR_ZXI))
@@ -4397,26 +4397,27 @@ static bool expandFillPPRFromZPRSlotPseudo(
43974397
43984398// / Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
43994399// / operations within the MachineBasicBlock \p MBB.
4400- static bool expandSMEPPRToZPRSpillPseudos (MachineBasicBlock &MBB,
4401- const TargetRegisterInfo &TRI,
4402- BitVector const &ZPRRegs,
4403- BitVector const &PPR3bRegs,
4404- BitVector const &GPRRegs,
4405- EmergencyStackSlots &SpillSlots) {
4400+ static bool expandSMEPPRToZPRSpillPseudos (
4401+ MachineBasicBlock &MBB, const TargetRegisterInfo &TRI,
4402+ ScavengeableRegs const &ScavengeableRegsBody,
4403+ ScavengeableRegs const &ScavengeableRegsFrameSetup,
4404+ EmergencyStackSlots &SpillSlots) {
44064405 LiveRegUnits UsedRegs (TRI);
44074406 UsedRegs.addLiveOuts (MBB);
44084407 bool HasPPRSpills = false ;
44094408 for (MachineInstr &MI : make_early_inc_range (reverse (MBB))) {
44104409 UsedRegs.stepBackward (MI);
4410+ ScavengeableRegs const &Regs = isInPrologueOrEpilogue (MI)
4411+ ? ScavengeableRegsFrameSetup
4412+ : ScavengeableRegsBody;
44114413 switch (MI.getOpcode ()) {
44124414 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4413- HasPPRSpills |= expandFillPPRFromZPRSlotPseudo (
4414- MBB, MI, TRI, UsedRegs, ZPRRegs, PPR3bRegs, GPRRegs , SpillSlots);
4415+ HasPPRSpills |= expandFillPPRFromZPRSlotPseudo (MBB, MI, TRI, UsedRegs,
4416+ Regs , SpillSlots);
44154417 MI.eraseFromParent ();
44164418 break ;
44174419 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4418- expandSpillPPRToZPRSlotPseudo (MBB, MI, TRI, UsedRegs, ZPRRegs,
4419- SpillSlots);
4420+ expandSpillPPRToZPRSlotPseudo (MBB, MI, TRI, UsedRegs, Regs, SpillSlots);
44204421 MI.eraseFromParent ();
44214422 break ;
44224423 default :
@@ -4434,40 +4435,47 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44344435 const TargetSubtargetInfo &TSI = MF.getSubtarget ();
44354436 const TargetRegisterInfo &TRI = *TSI.getRegisterInfo ();
44364437 if (AFI->hasStackFrame () && TRI.getSpillSize (AArch64::PPRRegClass) == 16 ) {
4437- const uint32_t *CSRMask =
4438- TRI.getCallPreservedMask (MF, MF.getFunction ().getCallingConv ());
4438+ // If predicates spills are 16-bytes we may need to expand
4439+ // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
4440+
44394441 const MachineFrameInfo &MFI = MF.getFrameInfo ();
44404442 assert (MFI.isCalleeSavedInfoValid ());
4443+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
44414444
44424445 auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
4443- BitVector ScavengeableRegs =
4444- TRI.getAllocatableSet (MF, TRI.getRegClass (RegClassID));
4445- if (CSRMask)
4446- ScavengeableRegs.clearBitsInMask (CSRMask);
4447- // TODO: Allow reusing callee-saved registers that have been saved.
4448- assert (ScavengeableRegs.count () > 0 && " Expected scavengeable registers" );
4449- return ScavengeableRegs;
4446+ BitVector Regs = TRI.getAllocatableSet (MF, TRI.getRegClass (RegClassID));
4447+
4448+ for (const CalleeSavedInfo &I : CSI)
4449+ if (TRI.getRegClass (RegClassID)->contains (I.getReg ()))
4450+ Regs.set (I.getReg ());
4451+
4452+ assert (Regs.count () > 0 && " Expected scavengeable registers" );
4453+ return Regs;
44504454 };
44514455
4452- // If predicates spills are 16-bytes we may need to expand
4453- // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
4454- // These are handled separately as we need to compute register liveness to
4455- // scavenge a ZPR and PPR during the expansion.
4456- BitVector ZPRRegs = ComputeScavengeableRegisters (AArch64::ZPRRegClassID);
4456+ const uint32_t *CSRMask =
4457+ TRI.getCallPreservedMask (MF, MF.getFunction ().getCallingConv ());
4458+
4459+ // Registers free to scavenge in the function body.
4460+ ScavengeableRegs ScavengeableRegsBody;
4461+ ScavengeableRegsBody.ZPRRegs =
4462+ ComputeScavengeableRegisters (AArch64::ZPRRegClassID);
44574463 // Only p0-7 are possible as the second operand of cmpne (needed for fills).
4458- BitVector PPR3bRegs =
4464+ ScavengeableRegsBody. PPR3bRegs =
44594465 ComputeScavengeableRegisters (AArch64::PPR_3bRegClassID);
4460- BitVector GPRRegs = ComputeScavengeableRegisters (AArch64::GPR64RegClassID);
4461-
4462- bool SpillsAboveP7 =
4463- any_of (MFI.getCalleeSavedInfo (), [](const CalleeSavedInfo &CSI) {
4464- return AArch64::PPR_p8to15RegClass.contains (CSI.getReg ());
4465- });
4466- // We spill p4 in determineCalleeSaves() if a predicate above p8 is spilled,
4467- // as it may be needed to reload callee saves (if p0-p3 are used as
4468- // returns).
4469- if (SpillsAboveP7)
4470- PPR3bRegs.set (AArch64::P4);
4466+ ScavengeableRegsBody.GPRRegs =
4467+ ComputeScavengeableRegisters (AArch64::GPR64RegClassID);
4468+
4469+ // Registers free to scavenge in the prologue/epilogue.
4470+ ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody;
4471+ ScavengeableRegsFrameSetup.ZPRRegs .clearBitsInMask (CSRMask);
4472+ ScavengeableRegsFrameSetup.GPRRegs .clearBitsInMask (CSRMask);
4473+ // Note: If p4 was available allow it to be scavenged (even though it is a
4474+ // CSR). P4 is reloaded last in the epilogue and is needed to reload
4475+ // predicates >= p8 if p0-p3 are used as return values.
4476+ ScavengeableRegsFrameSetup.PPR3bRegs .clearBitsInMask (CSRMask);
4477+ if (ScavengeableRegsBody.PPR3bRegs [AArch64::P4])
4478+ ScavengeableRegsFrameSetup.PPR3bRegs .set (AArch64::P4);
44714479
44724480 EmergencyStackSlots SpillSlots;
44734481 for (MachineBasicBlock &MBB : MF) {
@@ -4478,7 +4486,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44784486 // p0-p7 never requires spilling another predicate.
44794487 for (int Pass = 0 ; Pass < 2 ; Pass++) {
44804488 bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos (
4481- MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
4489+ MBB, TRI, ScavengeableRegsBody, ScavengeableRegsFrameSetup,
4490+ SpillSlots);
44824491 assert ((Pass == 0 || !HasPPRSpills) && " Did not expect PPR spills" );
44834492 if (!HasPPRSpills)
44844493 break ;
@@ -5532,9 +5541,10 @@ void AArch64FrameLowering::emitRemarks(
55325541 // spill/fill the predicate as a data vector (so are an FPR acess).
55335542 if (MI.getOpcode () != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
55345543 MI.getOpcode () != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
5535- AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ()))
5544+ AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ())) {
5545+ MI.dump ();
55365546 RegTy = StackAccess::PPR;
5537- else
5547+ } else
55385548 RegTy = StackAccess::FPR;
55395549 } else if (AArch64InstrInfo::isFpOrNEON (MI)) {
55405550 RegTy = StackAccess::FPR;
0 commit comments