Skip to content

Commit d3ff579

Browse files
committed
Fixups
1 parent b39e202 commit d3ff579

File tree

3 files changed

+223
-194
lines changed

3 files changed

+223
-194
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4336,13 +4336,9 @@ static bool expandFillPPRFromZPRSlotPseudo(
43364336
UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI);
43374337

43384338
Register PredReg = AArch64::NoRegister;
4339-
std::optional<ScopedScavengeOrSpill> FindPPR3bReg;
4340-
if (AArch64::PPR_3bRegClass.contains(MI.getOperand(0).getReg()))
4341-
PredReg = MI.getOperand(0).getReg();
4342-
else
4343-
FindPPR3bReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), PredReg,
4344-
AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs,
4345-
PPR3bRegs, SpillSlots.PPRSpillFI);
4339+
ScopedScavengeOrSpill FindPPR3bReg(
4340+
MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0,
4341+
AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI);
43464342

43474343
// Elide NZCV spills if we know it is not used.
43484344
Register NZCVSaveReg = AArch64::NoRegister;
@@ -4354,8 +4350,7 @@ static bool expandFillPPRFromZPRSlotPseudo(
43544350
SpillSlots.GPRSpillFI);
43554351

43564352
#ifndef NDEBUG
4357-
bool Spilled = FindZPRReg.hasSpilled() ||
4358-
(FindPPR3bReg && FindPPR3bReg->hasSpilled()) ||
4353+
bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() ||
43594354
(FindGPRReg && FindGPRReg->hasSpilled());
43604355
bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) ||
43614356
MI.getFlag(MachineInstr::FrameDestroy);
@@ -4397,7 +4392,7 @@ static bool expandFillPPRFromZPRSlotPseudo(
43974392
.getInstr());
43984393

43994394
propagateFrameFlags(MI, MachineInstrs);
4400-
return FindPPR3bReg && FindPPR3bReg->hasSpilled();
4395+
return FindPPR3bReg.hasSpilled();
44014396
}
44024397

44034398
/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO
@@ -4450,6 +4445,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44504445
if (CSRMask)
44514446
ScavengeableRegs.clearBitsInMask(CSRMask);
44524447
// TODO: Allow reusing callee-saved registers that have been saved.
4448+
assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers");
44534449
return ScavengeableRegs;
44544450
};
44554451

@@ -4475,9 +4471,15 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
44754471

44764472
EmergencyStackSlots SpillSlots;
44774473
for (MachineBasicBlock &MBB : MF) {
4474+
// In the case we had to spill a predicate (in the range p0-p7) to reload
4475+
// a predicate (>= p8), additional spill/fill pseudos will be created.
4476+
// These need an additional expansion pass. Note: There will only be at
4477+
// most two expansion passes, as spilling/filling a predicate in the range
4478+
// p0-p7 never requires spilling another predicate.
44784479
for (int Pass = 0; Pass < 2; Pass++) {
44794480
bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos(
44804481
MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots);
4482+
assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills");
44814483
if (!HasPPRSpills)
44824484
break;
44834485
}
@@ -5528,9 +5530,8 @@ void AArch64FrameLowering::emitRemarks(
55285530
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
55295531
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
55305532
// spill/fill the predicate as a data vector (so are an FPR acess).
5531-
if (!is_contained({AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO,
5532-
AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO},
5533-
MI.getOpcode()) &&
5533+
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
5534+
MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
55345535
AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
55355536
RegTy = StackAccess::PPR;
55365537
else

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const {
414414
//
415415
// FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
416416
// CPU features.
417-
if (EnableZPRPredicateSpills.getValue() &&
417+
if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 &&
418418
(isStreaming() || isStreamingCompatible())) {
419419
Modes |= (1 << 0);
420420
}

0 commit comments

Comments
 (0)