Skip to content

Commit f093d2e

Browse files
committed
[AArch64][SME] Rework VG CFI information for SM changes
This patch reworks how VG is handled around streaming mode changes. Previously, for functions with streaming mode changes, we would: - Save the incoming VG in the prologue - Emit `.cfi_offset vg, <offset>` and `.cfi_restore vg` around streaming mode changes Additionally, for locally streaming functions, we would: - Also save the streaming VG in the prologue - Emit `.cfi_offset vg, <incoming VG offset>` in the prologue - Emit `.cfi_offset vg, <streaming VG offset>` and `.cfi_restore vg` around streaming mode changes In both cases, this ends up doing more than necessary and would be hard for an unwinder to parse, as using `.cfi_offset` in this way does not follow the semantics of the underlying DWARF CFI opcodes. So the new scheme in this patch is to: In functions with streaming mode changes (inc locally streaming) - Save the incoming VG in the prologue - Emit `.cfi_offset vg, <offset>` in the prologue (not at streaming mode changes) - Never emit `.cfi_restore vg` (this is not meaningful for unwinding) - Explicitly reference the incoming VG expressions for SVE callee-saves in functions with streaming mode changes - Ensure the CFA is not described in terms of VG in functions with streaming mode changes A more in-depth discussion of this scheme is available in: https://gist.github.com/MacDue/b7a5c45d131d2440858165bfc903e97b But the TLDR is that following this scheme, SME unwinding can be implemented with minimal changes to existing unwinders. All unwinders need to do is initialize VG to `CNTD` at the start of unwinding, then everything else is handled by standard opcodes (which don't need changes to handle VG).
1 parent af5f16b commit f093d2e

29 files changed

+1535
-1523
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 55 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,11 @@ static bool requiresSaveVG(const MachineFunction &MF);
338338
// Conservatively, returns true if the function is likely to have an SVE vectors
339339
// on the stack. This function is safe to be called before callee-saves or
340340
// object offsets have been determined.
341-
static bool isLikelyToHaveSVEStack(MachineFunction &MF) {
341+
static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
342342
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
343-
if (AFI->isSVECC())
343+
if (MF.getFunction().getCallingConv() ==
344+
CallingConv::AArch64_SVE_VectorCall ||
345+
AFI->isSVECC())
344346
return true;
345347

346348
if (AFI->hasCalculatedStackSizeSVE())
@@ -532,6 +534,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
532534
bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
533535
const MachineFrameInfo &MFI = MF.getFrameInfo();
534536
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
537+
const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
535538

536539
// Win64 EH requires a frame pointer if funclets are present, as the locals
537540
// are accessed off the frame pointer in both the parent function and the
@@ -545,6 +548,16 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
545548
MFI.hasStackMap() || MFI.hasPatchPoint() ||
546549
RegInfo->hasStackRealignment(MF))
547550
return true;
551+
// If we have streaming mode changes and SVE registers on the stack we need a
552+
// FP. This is as the stack size may depend on the VG at entry to the
553+
// function, which is saved before the SVE area (so unrecoverable without a
554+
// FP). Similar for locally streaming functions, but it is because we use
555+
// ADDSVL to setup the SVE stack (which might not match VG, even without
556+
// streaming-mode changes).
557+
if (AFI.needsDwarfUnwindInfo(MF) &&
558+
((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) &&
559+
(!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0)))
560+
return true;
548561
// With large callframes around we may need to use FP to access the scavenging
549562
// emergency spillslot.
550563
//
@@ -663,10 +676,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
663676
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
664677
MachineFunction &MF = *MBB.getParent();
665678
MachineFrameInfo &MFI = MF.getFrameInfo();
666-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
667-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
668-
bool LocallyStreaming =
669-
Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
670679

671680
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
672681
if (CSI.empty())
@@ -680,14 +689,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
680689

681690
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
682691
int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
683-
684-
// The location of VG will be emitted before each streaming-mode change in
685-
// the function. Only locally-streaming functions require emitting the
686-
// non-streaming VG location here.
687-
if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||
688-
(!LocallyStreaming && Info.getReg() == AArch64::VG))
689-
continue;
690-
691692
CFIBuilder.buildOffset(Info.getReg(), Offset);
692693
}
693694
}
@@ -707,8 +708,16 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations(
707708
AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
708709
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
709710

711+
std::optional<int64_t> IncomingVGOffsetFromDefCFA;
712+
if (requiresSaveVG(MF)) {
713+
auto IncomingVG = *find_if(
714+
reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
715+
IncomingVGOffsetFromDefCFA =
716+
MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea();
717+
}
718+
710719
for (const auto &Info : CSI) {
711-
if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
720+
if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
712721
continue;
713722

714723
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -722,7 +731,8 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations(
722731
StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
723732
StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
724733

725-
CFIBuilder.insertCFIInst(createCFAOffset(TRI, Reg, Offset));
734+
CFIBuilder.insertCFIInst(
735+
createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
726736
}
727737
}
728738

@@ -1465,10 +1475,10 @@ bool requiresGetVGCall(MachineFunction &MF) {
14651475

14661476
static bool requiresSaveVG(const MachineFunction &MF) {
14671477
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1478+
if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
1479+
return false;
14681480
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
14691481
// is enabled with streaming mode changes.
1470-
if (!AFI->hasStreamingModeChanges())
1471-
return false;
14721482
auto &ST = MF.getSubtarget<AArch64Subtarget>();
14731483
if (ST.isTargetDarwin())
14741484
return ST.hasSVE();
@@ -1484,8 +1494,7 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
14841494
bool isVGInstruction(MachineBasicBlock::iterator MBBI,
14851495
const TargetLowering &TLI) {
14861496
unsigned Opc = MBBI->getOpcode();
1487-
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1488-
Opc == AArch64::UBFMXri)
1497+
if (Opc == AArch64::CNTD_XPiI)
14891498
return true;
14901499

14911500
if (!requiresGetVGCall(*MBBI->getMF()))
@@ -1509,9 +1518,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
15091518
unsigned NewOpc;
15101519

15111520
// If the function contains streaming mode changes, we expect instructions
1512-
// to calculate the value of VG before spilling. For locally-streaming
1513-
// functions, we need to do this for both the streaming and non-streaming
1514-
// vector length. Move past these instructions if necessary.
1521+
// to calculate the value of VG before spilling. Move past these instructions
1522+
// if necessary.
15151523
MachineFunction &MF = *MBB.getParent();
15161524
if (requiresSaveVG(MF)) {
15171525
auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -3475,7 +3483,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
34753483
MachineFunction &MF = *MBB.getParent();
34763484
auto &TLI = *MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
34773485
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3478-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
34793486
bool NeedsWinCFI = needsWinCFI(MF);
34803487
DebugLoc DL;
34813488
SmallVector<RegPairInfo, 8> RegPairs;
@@ -3544,40 +3551,31 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35443551
}
35453552

35463553
unsigned X0Scratch = AArch64::NoRegister;
3554+
auto RestoreX0 = make_scope_exit([&] {
3555+
if (X0Scratch != AArch64::NoRegister)
3556+
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
3557+
.addReg(AArch64::XZR)
3558+
.addReg(X0Scratch, RegState::Undef)
3559+
.addReg(X0Scratch, RegState::Implicit)
3560+
.setMIFlag(MachineInstr::FrameSetup);
3561+
});
3562+
35473563
if (Reg1 == AArch64::VG) {
35483564
// Find an available register to store value of VG to.
35493565
Reg1 = findScratchNonCalleeSaveRegister(&MBB, true);
35503566
assert(Reg1 != AArch64::NoRegister);
3551-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
3552-
3553-
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3554-
AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
3555-
// For locally-streaming functions, we need to store both the streaming
3556-
// & non-streaming VG. Spill the streaming value first.
3557-
BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
3558-
.addImm(1)
3559-
.setMIFlag(MachineInstr::FrameSetup);
3560-
BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
3561-
.addReg(Reg1)
3562-
.addImm(3)
3563-
.addImm(63)
3564-
.setMIFlag(MachineInstr::FrameSetup);
3565-
3566-
AFI->setStreamingVGIdx(RPI.FrameIdx);
3567-
} else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
3567+
if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
35683568
BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
35693569
.addImm(31)
35703570
.addImm(1)
35713571
.setMIFlag(MachineInstr::FrameSetup);
3572-
AFI->setVGIdx(RPI.FrameIdx);
35733572
} else {
35743573
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
3575-
if (llvm::any_of(
3576-
MBB.liveins(),
3577-
[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3578-
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3579-
AArch64::X0, LiveIn.PhysReg);
3580-
}))
3574+
if (any_of(MBB.liveins(),
3575+
[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3576+
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3577+
AArch64::X0, LiveIn.PhysReg);
3578+
}))
35813579
X0Scratch = Reg1;
35823580

35833581
if (X0Scratch != AArch64::NoRegister)
@@ -3596,7 +3594,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35963594
.addReg(AArch64::X0, RegState::ImplicitDefine)
35973595
.setMIFlag(MachineInstr::FrameSetup);
35983596
Reg1 = AArch64::X0;
3599-
AFI->setVGIdx(RPI.FrameIdx);
36003597
}
36013598
}
36023599

@@ -3691,13 +3688,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
36913688
if (RPI.isPaired())
36923689
MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
36933690
}
3694-
3695-
if (X0Scratch != AArch64::NoRegister)
3696-
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
3697-
.addReg(AArch64::XZR)
3698-
.addReg(X0Scratch, RegState::Undef)
3699-
.addReg(X0Scratch, RegState::Implicit)
3700-
.setMIFlag(MachineInstr::FrameSetup);
37013691
}
37023692
return true;
37033693
}
@@ -4076,15 +4066,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
40764066

40774067
// Increase the callee-saved stack size if the function has streaming mode
40784068
// changes, as we will need to spill the value of the VG register.
4079-
// For locally streaming functions, we spill both the streaming and
4080-
// non-streaming VG value.
4081-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
4082-
if (requiresSaveVG(MF)) {
4083-
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
4084-
CSStackSize += 16;
4085-
else
4086-
CSStackSize += 8;
4087-
}
4069+
if (requiresSaveVG(MF))
4070+
CSStackSize += 8;
40884071

40894072
// Determine if a Hazard slot should be used, and increase the CSStackSize by
40904073
// StackHazardSize if so.
@@ -4235,29 +4218,19 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
42354218

42364219
// Insert VG into the list of CSRs, immediately before LR if saved.
42374220
if (requiresSaveVG(MF)) {
4238-
std::vector<CalleeSavedInfo> VGSaves;
4239-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
4240-
4241-
auto VGInfo = CalleeSavedInfo(AArch64::VG);
4221+
CalleeSavedInfo VGInfo(AArch64::VG);
42424222
VGInfo.setRestored(false);
4243-
VGSaves.push_back(VGInfo);
4244-
4245-
// Add VG again if the function is locally-streaming, as we will spill two
4246-
// values.
4247-
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
4248-
VGSaves.push_back(VGInfo);
4249-
4250-
bool InsertBeforeLR = false;
42514223

4224+
bool InsertedBeforeLR = false;
42524225
for (unsigned I = 0; I < CSI.size(); I++)
42534226
if (CSI[I].getReg() == AArch64::LR) {
4254-
InsertBeforeLR = true;
4255-
CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
4227+
InsertedBeforeLR = true;
4228+
CSI.insert(CSI.begin() + I, VGInfo);
42564229
break;
42574230
}
42584231

4259-
if (!InsertBeforeLR)
4260-
llvm::append_range(CSI, VGSaves);
4232+
if (!InsertedBeforeLR)
4233+
CSI.push_back(VGInfo);
42614234
}
42624235

42634236
Register LastReg = 0;
@@ -5260,46 +5233,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
52605233
}
52615234
} // namespace
52625235

5263-
static void emitVGSaveRestore(MachineBasicBlock::iterator II,
5264-
const AArch64FrameLowering *TFI) {
5265-
MachineInstr &MI = *II;
5266-
MachineBasicBlock *MBB = MI.getParent();
5267-
MachineFunction *MF = MBB->getParent();
5268-
5269-
if (MI.getOpcode() != AArch64::VGSavePseudo &&
5270-
MI.getOpcode() != AArch64::VGRestorePseudo)
5271-
return;
5272-
5273-
auto *AFI = MF->getInfo<AArch64FunctionInfo>();
5274-
SMEAttrs FuncAttrs = AFI->getSMEFnAttrs();
5275-
bool LocallyStreaming =
5276-
FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface();
5277-
5278-
int64_t VGFrameIdx =
5279-
LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx();
5280-
assert(VGFrameIdx != std::numeric_limits<int>::max() &&
5281-
"Expected FrameIdx for VG");
5282-
5283-
CFIInstBuilder CFIBuilder(*MBB, II, MachineInstr::NoFlags);
5284-
if (MI.getOpcode() == AArch64::VGSavePseudo) {
5285-
const MachineFrameInfo &MFI = MF->getFrameInfo();
5286-
int64_t Offset =
5287-
MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea();
5288-
CFIBuilder.buildOffset(AArch64::VG, Offset);
5289-
} else {
5290-
CFIBuilder.buildRestore(AArch64::VG);
5291-
}
5292-
5293-
MI.eraseFromParent();
5294-
}
5295-
52965236
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
52975237
MachineFunction &MF, RegScavenger *RS = nullptr) const {
52985238
for (auto &BB : MF)
52995239
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
5300-
if (requiresSaveVG(MF))
5301-
emitVGSaveRestore(II++, this);
5302-
else if (StackTaggingMergeSetTag)
5240+
if (StackTaggingMergeSetTag)
53035241
II = tryMergeAdjacentSTG(II, this, RS);
53045242
}
53055243

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9517,17 +9517,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
95179517

95189518
SDValue InGlue;
95199519
if (RequiresSMChange) {
9520-
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9521-
Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
9522-
DAG.getVTList(MVT::Other, MVT::Glue), Chain);
9523-
InGlue = Chain.getValue(1);
9524-
}
9525-
9526-
SDValue NewChain =
9520+
Chain =
95279521
changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(),
95289522
Chain, InGlue, getSMToggleCondition(CallAttrs));
9529-
Chain = NewChain.getValue(0);
9530-
InGlue = NewChain.getValue(1);
9523+
InGlue = Chain.getValue(1);
95319524
}
95329525

95339526
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -9712,13 +9705,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
97129705
Result = changeStreamingMode(
97139706
DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue,
97149707
getSMToggleCondition(CallAttrs));
9715-
9716-
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9717-
InGlue = Result.getValue(1);
9718-
Result =
9719-
DAG.getNode(AArch64ISD::VG_RESTORE, DL,
9720-
DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
9721-
}
97229708
}
97239709

97249710
if (RequiresLazySave || CallAttrs.requiresEnablingZAAfterCall())

0 commit comments

Comments
 (0)