Skip to content

Commit 636ddf6

Browse files
committed
Fixups
1 parent d3ff579 commit 636ddf6

File tree

3 files changed

+149
-132
lines changed

3 files changed

+149
-132
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 89 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -4212,20 +4212,22 @@ struct ScopedScavengeOrSpill {
42124212
Register SpillCandidate, const TargetRegisterClass &RC,
42134213
LiveRegUnits const &UsedRegs,
42144214
BitVector const &AllocatableRegs,
4215-
std::optional<int> &MaybeSpillFI)
4215+
std::optional<int> *MaybeSpillFI)
42164216
: MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
42174217
*MF.getSubtarget().getInstrInfo())),
42184218
TRI(*MF.getSubtarget().getRegisterInfo()) {
42194219
FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs);
42204220
if (FreeReg != AArch64::NoRegister)
42214221
return;
4222-
if (!MaybeSpillFI) {
4222+
assert(MaybeSpillFI && "Expected emergency spill slot FI information "
4223+
"(attempted to spill in prologue/epilogue?)");
4224+
if (!MaybeSpillFI->has_value()) {
42234225
MachineFrameInfo &MFI = MF.getFrameInfo();
4224-
MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC),
4225-
TRI.getSpillAlign(RC));
4226+
*MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC),
4227+
TRI.getSpillAlign(RC));
42264228
}
42274229
FreeReg = SpilledReg = SpillCandidate;
4228-
SpillFI = *MaybeSpillFI;
4230+
SpillFI = MaybeSpillFI->value();
42294231
TII.storeRegToStackSlot(MBB, MBBI, SpilledReg, false, SpillFI, &RC, &TRI,
42304232
Register());
42314233
}
@@ -4256,6 +4258,18 @@ struct EmergencyStackSlots {
42564258
std::optional<int> GPRSpillFI;
42574259
};
42584260

4261+
/// Registers available for scavenging (ZPR, PPR3b, GPR).
4262+
struct ScavengeableRegs {
4263+
BitVector ZPRRegs;
4264+
BitVector PPR3bRegs;
4265+
BitVector GPRRegs;
4266+
};
4267+
4268+
static bool isInPrologueOrEpilogue(const MachineInstr &MI) {
4269+
return MI.getFlag(MachineInstr::FrameSetup) ||
4270+
MI.getFlag(MachineInstr::FrameDestroy);
4271+
}
4272+
42594273
/// Expands:
42604274
/// ```
42614275
/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0
@@ -4271,24 +4285,17 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
42714285
MachineInstr &MI,
42724286
const TargetRegisterInfo &TRI,
42734287
LiveRegUnits const &UsedRegs,
4274-
BitVector const &ZPRRegs,
4288+
ScavengeableRegs const &Regs,
42754289
EmergencyStackSlots &SpillSlots) {
42764290
MachineFunction &MF = *MBB.getParent();
42774291
auto *TII =
42784292
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
42794293

42804294
Register ZPredReg = AArch64::NoRegister;
4281-
ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI),
4282-
ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
4283-
UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
4284-
4285-
#ifndef NDEBUG
4286-
bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
4287-
MI.getFlag(MachineInstr::FrameDestroy);
4288-
assert((!FindZPRReg.hasSpilled() || !InPrologueOrEpilogue) &&
4289-
"SPILL_PPR_TO_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
4290-
"or epilogue");
4291-
#endif
4295+
ScopedScavengeOrSpill FindZPRReg(
4296+
MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0,
4297+
AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs,
4298+
isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI);
42924299

42934300
SmallVector<MachineInstr *, 2> MachineInstrs;
42944301
const DebugLoc &DL = MI.getDebugLoc();
@@ -4321,44 +4328,37 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
43214328
/// spilling if necessary). If the status flags are in use at the point of
43224329
/// expansion they are preserved (by moving them to/from a GPR). This may cause
43234330
/// an additional spill if no GPR is free at the expansion point.
4324-
static bool expandFillPPRFromZPRSlotPseudo(
4325-
MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
4326-
LiveRegUnits const &UsedRegs, BitVector const &ZPRRegs,
4327-
BitVector const &PPR3bRegs, BitVector const &GPRRegs,
4328-
EmergencyStackSlots &SpillSlots) {
4331+
static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
4332+
MachineInstr &MI,
4333+
const TargetRegisterInfo &TRI,
4334+
LiveRegUnits const &UsedRegs,
4335+
ScavengeableRegs const &Regs,
4336+
EmergencyStackSlots &SpillSlots) {
43294337
MachineFunction &MF = *MBB.getParent();
43304338
auto *TII =
43314339
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
43324340

43334341
Register ZPredReg = AArch64::NoRegister;
4334-
ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI),
4335-
ZPredReg, AArch64::Z0, AArch64::ZPRRegClass,
4336-
UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
4342+
ScopedScavengeOrSpill FindZPRReg(
4343+
MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0,
4344+
AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs,
4345+
isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI);
43374346

43384347
Register PredReg = AArch64::NoRegister;
43394348
ScopedScavengeOrSpill FindPPR3bReg(
43404349
MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0,
4341-
AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI);
4350+
AArch64::PPR_3bRegClass, UsedRegs, Regs.PPR3bRegs,
4351+
isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI);
43424352

43434353
// Elide NZCV spills if we know it is not used.
43444354
Register NZCVSaveReg = AArch64::NoRegister;
43454355
bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
43464356
std::optional<ScopedScavengeOrSpill> FindGPRReg;
43474357
if (IsNZCVUsed)
4348-
FindGPRReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg,
4349-
AArch64::X0, AArch64::GPR64RegClass, UsedRegs, GPRRegs,
4350-
SpillSlots.GPRSpillFI);
4351-
4352-
#ifndef NDEBUG
4353-
bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() ||
4354-
(FindGPRReg && FindGPRReg->hasSpilled());
4355-
bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
4356-
MI.getFlag(MachineInstr::FrameDestroy);
4357-
assert((!Spilled || !InPrologueOrEpilogue) &&
4358-
"FILL_PPR_FROM_ZPR_SLOT_PSEUDO expansion should not spill in prologue "
4359-
"or epilogue");
4360-
#endif
4361-
4358+
FindGPRReg.emplace(
4359+
MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, AArch64::X0,
4360+
AArch64::GPR64RegClass, UsedRegs, Regs.GPRRegs,
4361+
isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI);
43624362
SmallVector<MachineInstr *, 4> MachineInstrs;
43634363
const DebugLoc &DL = MI.getDebugLoc();
43644364
MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI))
@@ -4397,26 +4397,27 @@ static bool expandFillPPRFromZPRSlotPseudo(
43974397

43984398
/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
43994399
/// operations within the MachineBasicBlock \p MBB.
4400-
static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
4401-
const TargetRegisterInfo &TRI,
4402-
BitVector const &ZPRRegs,
4403-
BitVector const &PPR3bRegs,
4404-
BitVector const &GPRRegs,
4405-
EmergencyStackSlots &SpillSlots) {
4400+
static bool expandSMEPPRToZPRSpillPseudos(
4401+
MachineBasicBlock &MBB, const TargetRegisterInfo &TRI,
4402+
ScavengeableRegs const &ScavengeableRegsBody,
4403+
ScavengeableRegs const &ScavengeableRegsFrameSetup,
4404+
EmergencyStackSlots &SpillSlots) {
44064405
LiveRegUnits UsedRegs(TRI);
44074406
UsedRegs.addLiveOuts(MBB);
44084407
bool HasPPRSpills = false;
44094408
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
44104409
UsedRegs.stepBackward(MI);
4410+
ScavengeableRegs const &Regs = isInPrologueOrEpilogue(MI)
4411+
? ScavengeableRegsFrameSetup
4412+
: ScavengeableRegsBody;
44114413
switch (MI.getOpcode()) {
44124414
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4413-
HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(
4414-
MBB, MI, TRI, UsedRegs, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
4415+
HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs,
4416+
Regs, SpillSlots);
44154417
MI.eraseFromParent();
44164418
break;
44174419
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4418-
expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, ZPRRegs,
4419-
SpillSlots);
4420+
expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, Regs, SpillSlots);
44204421
MI.eraseFromParent();
44214422
break;
44224423
default:
@@ -4434,40 +4435,47 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44344435
const TargetSubtargetInfo &TSI = MF.getSubtarget();
44354436
const TargetRegisterInfo &TRI = *TSI.getRegisterInfo();
44364437
if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) {
4437-
const uint32_t *CSRMask =
4438-
TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4438+
// If predicates spills are 16-bytes we may need to expand
4439+
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
4440+
44394441
const MachineFrameInfo &MFI = MF.getFrameInfo();
44404442
assert(MFI.isCalleeSavedInfoValid());
4443+
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
44414444

44424445
auto ComputeScavengeableRegisters = [&](unsigned RegClassID) {
4443-
BitVector ScavengeableRegs =
4444-
TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
4445-
if (CSRMask)
4446-
ScavengeableRegs.clearBitsInMask(CSRMask);
4447-
// TODO: Allow reusing callee-saved registers that have been saved.
4448-
assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers");
4449-
return ScavengeableRegs;
4446+
BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID));
4447+
4448+
for (const CalleeSavedInfo &I : CSI)
4449+
if (TRI.getRegClass(RegClassID)->contains(I.getReg()))
4450+
Regs.set(I.getReg());
4451+
4452+
assert(Regs.count() > 0 && "Expected scavengeable registers");
4453+
return Regs;
44504454
};
44514455

4452-
// If predicates spills are 16-bytes we may need to expand
4453-
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO.
4454-
// These are handled separately as we need to compute register liveness to
4455-
// scavenge a ZPR and PPR during the expansion.
4456-
BitVector ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
4456+
const uint32_t *CSRMask =
4457+
TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4458+
4459+
// Registers free to scavenge in the function body.
4460+
ScavengeableRegs ScavengeableRegsBody;
4461+
ScavengeableRegsBody.ZPRRegs =
4462+
ComputeScavengeableRegisters(AArch64::ZPRRegClassID);
44574463
// Only p0-7 are possible as the second operand of cmpne (needed for fills).
4458-
BitVector PPR3bRegs =
4464+
ScavengeableRegsBody.PPR3bRegs =
44594465
ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID);
4460-
BitVector GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
4461-
4462-
bool SpillsAboveP7 =
4463-
any_of(MFI.getCalleeSavedInfo(), [](const CalleeSavedInfo &CSI) {
4464-
return AArch64::PPR_p8to15RegClass.contains(CSI.getReg());
4465-
});
4466-
// We spill p4 in determineCalleeSaves() if a predicate above p8 is spilled,
4467-
// as it may be needed to reload callee saves (if p0-p3 are used as
4468-
// returns).
4469-
if (SpillsAboveP7)
4470-
PPR3bRegs.set(AArch64::P4);
4466+
ScavengeableRegsBody.GPRRegs =
4467+
ComputeScavengeableRegisters(AArch64::GPR64RegClassID);
4468+
4469+
// Registers free to scavenge in the prologue/epilogue.
4470+
ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody;
4471+
ScavengeableRegsFrameSetup.ZPRRegs.clearBitsInMask(CSRMask);
4472+
ScavengeableRegsFrameSetup.GPRRegs.clearBitsInMask(CSRMask);
4473+
// Note: If p4 was available allow it to be scavenged (even though it is a
4474+
// CSR). P4 is reloaded last in the epilogue and is needed to reload
4475+
// predicates >= p8 if p0-p3 are used as return values.
4476+
ScavengeableRegsFrameSetup.PPR3bRegs.clearBitsInMask(CSRMask);
4477+
if (ScavengeableRegsBody.PPR3bRegs[AArch64::P4])
4478+
ScavengeableRegsFrameSetup.PPR3bRegs.set(AArch64::P4);
44714479

44724480
EmergencyStackSlots SpillSlots;
44734481
for (MachineBasicBlock &MBB : MF) {
@@ -4478,7 +4486,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44784486
// p0-p7 never requires spilling another predicate.
44794487
for (int Pass = 0; Pass < 2; Pass++) {
44804488
bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos(
4481-
MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
4489+
MBB, TRI, ScavengeableRegsBody, ScavengeableRegsFrameSetup,
4490+
SpillSlots);
44824491
assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills");
44834492
if (!HasPPRSpills)
44844493
break;
@@ -5532,9 +5541,10 @@ void AArch64FrameLowering::emitRemarks(
55325541
// spill/fill the predicate as a data vector (so are an FPR acess).
55335542
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
55345543
MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
5535-
AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
5544+
AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) {
5545+
MI.dump();
55365546
RegTy = StackAccess::PPR;
5537-
else
5547+
} else
55385548
RegTy = StackAccess::FPR;
55395549
} else if (AArch64InstrInfo::isFpOrNEON(MI)) {
55405550
RegTy = StackAccess::FPR;

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const {
414414
//
415415
// FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
416416
// CPU features.
417-
if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 &&
417+
if (EnableZPRPredicateSpills.getValue() &&
418418
(isStreaming() || isStreamingCompatible())) {
419419
Modes |= (1 << 0);
420420
}

0 commit comments

Comments
 (0)