@@ -4438,42 +4438,49 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44384438 // If predicates spills are 16-bytes we may need to expand
44394439 // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
44404440
4441- const MachineFrameInfo &MFI = MF.getFrameInfo ();
4442- assert (MFI.isCalleeSavedInfoValid ());
4443- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
4441+ const uint32_t *CSRMask =
4442+ TRI.getCallPreservedMask (MF, MF.getFunction ().getCallingConv ());
44444443
44454444 auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
44464445 BitVector Regs = TRI.getAllocatableSet (MF, TRI.getRegClass (RegClassID));
4447-
4448- for (const CalleeSavedInfo &I : CSI)
4449- if (TRI.getRegClass (RegClassID)->contains (I.getReg ()))
4450- Regs.set (I.getReg ());
4451-
4446+ Regs.clearBitsInMask (CSRMask);
44524447 assert (Regs.count () > 0 && " Expected scavengeable registers" );
44534448 return Regs;
44544449 };
44554450
4456- const uint32_t *CSRMask =
4457- TRI.getCallPreservedMask (MF, MF.getFunction ().getCallingConv ());
4458-
4459- // Registers free to scavenge in the function body.
4460- ScavengeableRegs ScavengeableRegsBody;
4461- ScavengeableRegsBody.ZPRRegs =
4451+ // Registers free to scavenge in the prologue/epilogue.
4452+ ScavengeableRegs ScavengeableRegsFrameSetup;
4453+ ScavengeableRegsFrameSetup.ZPRRegs =
44624454 ComputeScavengeableRegisters (AArch64::ZPRRegClassID);
44634455 // Only p0-7 are possible as the second operand of cmpne (needed for fills).
4464- ScavengeableRegsBody .PPR3bRegs =
4456+ ScavengeableRegsFrameSetup .PPR3bRegs =
44654457 ComputeScavengeableRegisters (AArch64::PPR_3bRegClassID);
4466- ScavengeableRegsBody .GPRRegs =
4458+ ScavengeableRegsFrameSetup .GPRRegs =
44674459 ComputeScavengeableRegisters (AArch64::GPR64RegClassID);
44684460
4469- // Registers free to scavenge in the prologue/epilogue.
4470- ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody;
4471- ScavengeableRegsFrameSetup.ZPRRegs .clearBitsInMask (CSRMask);
4472- ScavengeableRegsFrameSetup.GPRRegs .clearBitsInMask (CSRMask);
4473- // Note: If p4 was available allow it to be scavenged (even though it is a
4474- // CSR). P4 is reloaded last in the epilogue and is needed to reload
4475- // predicates >= p8 if p0-p3 are used as return values.
4476- ScavengeableRegsFrameSetup.PPR3bRegs .clearBitsInMask (CSRMask);
4461+ const MachineFrameInfo &MFI = MF.getFrameInfo ();
4462+ assert (MFI.isCalleeSavedInfoValid ());
4463+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
4464+ auto MarkSavedRegistersAsAvailable =
4465+ [&, &Reserved = MF.getRegInfo ().getReservedRegs ()](
4466+ BitVector &Regs, unsigned RegClassID) {
4467+ for (const CalleeSavedInfo &I : CSI)
4468+ if (!Reserved[I.getReg ()] &&
4469+ TRI.getRegClass (RegClassID)->contains (I.getReg ()))
4470+ Regs.set (I.getReg ());
4471+ };
4472+
4473+ // Registers free to scavenge in the function body.
4474+ ScavengeableRegs ScavengeableRegsBody = ScavengeableRegsFrameSetup;
4475+ MarkSavedRegistersAsAvailable (ScavengeableRegsBody.ZPRRegs ,
4476+ AArch64::ZPRRegClassID);
4477+ MarkSavedRegistersAsAvailable (ScavengeableRegsBody.PPR3bRegs ,
4478+ AArch64::PPR_3bRegClassID);
4479+ MarkSavedRegistersAsAvailable (ScavengeableRegsBody.GPRRegs ,
4480+ AArch64::GPR64RegClassID);
4481+
4482+ // p4 (CSR) is reloaded last in the epilogue, so if it is saved, it can be
4483+ // used to reload other predicates.
44774484 if (ScavengeableRegsBody.PPR3bRegs [AArch64::P4])
44784485 ScavengeableRegsFrameSetup.PPR3bRegs .set (AArch64::P4);
44794486
0 commit comments