Skip to content

Commit 478b4b0

Browse files
authored
[AArch64][SME] Rework VG CFI information for streaming-mode changes (#152283)
This patch reworks how VG is handled around streaming mode changes. Previously, for functions with streaming mode changes, we would: - Save the incoming VG in the prologue - Emit `.cfi_offset vg, <offset>` and `.cfi_restore vg` around streaming mode changes Additionally, for locally streaming functions, we would: - Also save the streaming VG in the prologue - Emit `.cfi_offset vg, <incoming VG offset>` in the prologue - Emit `.cfi_offset vg, <streaming VG offset>` and `.cfi_restore vg` around streaming mode changes In both cases, this ends up doing more than necessary and would be hard for an unwinder to parse, as using `.cfi_offset` in this way does not follow the semantics of the underlying DWARF CFI opcodes. So the new scheme in this patch is to: In functions with streaming mode changes (inc locally streaming) - Save the incoming VG in the prologue - Emit `.cfi_offset vg, <offset>` in the prologue (not at streaming mode changes) - Emit `.cfi_restore vg` after the saved VG has been deallocated - This will be in the function epilogue, where VG is always the same as the entry VG - Explicitly reference the incoming VG expressions for SVE callee-saves in functions with streaming mode changes - Ensure the CFA is not described in terms of VG in functions with streaming mode changes A more in-depth discussion of this scheme is available in: https://gist.github.com/MacDue/b7a5c45d131d2440858165bfc903e97b But the TLDR is that following this scheme, SME unwinding can be implemented with minimal changes to existing unwinders. All unwinders need to do is initialize VG to `CNTD` at the start of unwinding, then everything else is handled by standard opcodes (which don't need changes to handle VG).
1 parent c075fb8 commit 478b4b0

29 files changed

+1597
-1539
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 68 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ static bool requiresSaveVG(const MachineFunction &MF);
338338
// Conservatively, returns true if the function is likely to have an SVE vectors
339339
// on the stack. This function is safe to be called before callee-saves or
340340
// object offsets have been determined.
341-
static bool isLikelyToHaveSVEStack(MachineFunction &MF) {
341+
static bool isLikelyToHaveSVEStack(const MachineFunction &MF) {
342342
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
343343
if (AFI->isSVECC())
344344
return true;
@@ -532,6 +532,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
532532
bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
533533
const MachineFrameInfo &MFI = MF.getFrameInfo();
534534
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
535+
const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
535536

536537
// Win64 EH requires a frame pointer if funclets are present, as the locals
537538
// are accessed off the frame pointer in both the parent function and the
@@ -545,6 +546,29 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
545546
MFI.hasStackMap() || MFI.hasPatchPoint() ||
546547
RegInfo->hasStackRealignment(MF))
547548
return true;
549+
550+
// If we:
551+
//
552+
// 1. Have streaming mode changes
553+
// OR:
554+
// 2. Have a streaming body with SVE stack objects
555+
//
556+
// Then the value of VG restored when unwinding to this function may not match
557+
// the value of VG used to set up the stack.
558+
//
559+
// This is a problem as the CFA can be described with an expression of the
560+
// form: CFA = SP + NumBytes + VG * NumScalableBytes.
561+
//
562+
// If the value of VG used in that expression does not match the value used to
563+
// set up the stack, an incorrect address for the CFA will be computed, and
564+
// unwinding will fail.
565+
//
566+
// We work around this issue by ensuring the frame-pointer can describe the
567+
// CFA in either of these cases.
568+
if (AFI.needsDwarfUnwindInfo(MF) &&
569+
((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) &&
570+
(!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0)))
571+
return true;
548572
// With large callframes around we may need to use FP to access the scavenging
549573
// emergency spillslot.
550574
//
@@ -663,10 +687,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
663687
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
664688
MachineFunction &MF = *MBB.getParent();
665689
MachineFrameInfo &MFI = MF.getFrameInfo();
666-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
667-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
668-
bool LocallyStreaming =
669-
Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
670690

671691
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
672692
if (CSI.empty())
@@ -680,14 +700,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
680700

681701
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
682702
int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
683-
684-
// The location of VG will be emitted before each streaming-mode change in
685-
// the function. Only locally-streaming functions require emitting the
686-
// non-streaming VG location here.
687-
if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||
688-
(!LocallyStreaming && Info.getReg() == AArch64::VG))
689-
continue;
690-
691703
CFIBuilder.buildOffset(Info.getReg(), Offset);
692704
}
693705
}
@@ -707,8 +719,16 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations(
707719
AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
708720
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
709721

722+
std::optional<int64_t> IncomingVGOffsetFromDefCFA;
723+
if (requiresSaveVG(MF)) {
724+
auto IncomingVG = *find_if(
725+
reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
726+
IncomingVGOffsetFromDefCFA =
727+
MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea();
728+
}
729+
710730
for (const auto &Info : CSI) {
711-
if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
731+
if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
712732
continue;
713733

714734
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -722,7 +742,8 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations(
722742
StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
723743
StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
724744

725-
CFIBuilder.insertCFIInst(createCFAOffset(TRI, Reg, Offset));
745+
CFIBuilder.insertCFIInst(
746+
createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
726747
}
727748
}
728749

@@ -783,9 +804,6 @@ static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
783804
!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
784805
continue;
785806

786-
if (!Info.isRestored())
787-
continue;
788-
789807
CFIBuilder.buildRestore(Info.getReg());
790808
}
791809
}
@@ -1465,10 +1483,10 @@ bool requiresGetVGCall(MachineFunction &MF) {
14651483

14661484
static bool requiresSaveVG(const MachineFunction &MF) {
14671485
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1486+
if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
1487+
return false;
14681488
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
14691489
// is enabled with streaming mode changes.
1470-
if (!AFI->hasStreamingModeChanges())
1471-
return false;
14721490
auto &ST = MF.getSubtarget<AArch64Subtarget>();
14731491
if (ST.isTargetDarwin())
14741492
return ST.hasSVE();
@@ -1484,8 +1502,7 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
14841502
bool isVGInstruction(MachineBasicBlock::iterator MBBI,
14851503
const TargetLowering &TLI) {
14861504
unsigned Opc = MBBI->getOpcode();
1487-
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1488-
Opc == AArch64::UBFMXri)
1505+
if (Opc == AArch64::CNTD_XPiI)
14891506
return true;
14901507

14911508
if (!requiresGetVGCall(*MBBI->getMF()))
@@ -1494,7 +1511,7 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
14941511
if (Opc == AArch64::BL)
14951512
return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
14961513

1497-
return Opc == AArch64::ORRXrr;
1514+
return Opc == TargetOpcode::COPY;
14981515
}
14991516

15001517
// Convert callee-save register save/restore instruction to do stack pointer
@@ -1509,9 +1526,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
15091526
unsigned NewOpc;
15101527

15111528
// If the function contains streaming mode changes, we expect instructions
1512-
// to calculate the value of VG before spilling. For locally-streaming
1513-
// functions, we need to do this for both the streaming and non-streaming
1514-
// vector length. Move past these instructions if necessary.
1529+
// to calculate the value of VG before spilling. Move past these instructions
1530+
// if necessary.
15151531
MachineFunction &MF = *MBB.getParent();
15161532
if (requiresSaveVG(MF)) {
15171533
auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -3475,7 +3491,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
34753491
MachineFunction &MF = *MBB.getParent();
34763492
auto &TLI = *MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
34773493
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3478-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
34793494
bool NeedsWinCFI = needsWinCFI(MF);
34803495
DebugLoc DL;
34813496
SmallVector<RegPairInfo, 8> RegPairs;
@@ -3544,48 +3559,34 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35443559
}
35453560

35463561
unsigned X0Scratch = AArch64::NoRegister;
3562+
auto RestoreX0 = make_scope_exit([&] {
3563+
if (X0Scratch != AArch64::NoRegister)
3564+
BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), AArch64::X0)
3565+
.addReg(X0Scratch)
3566+
.setMIFlag(MachineInstr::FrameSetup);
3567+
});
3568+
35473569
if (Reg1 == AArch64::VG) {
35483570
// Find an available register to store value of VG to.
35493571
Reg1 = findScratchNonCalleeSaveRegister(&MBB, true);
35503572
assert(Reg1 != AArch64::NoRegister);
3551-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
3552-
3553-
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3554-
AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
3555-
// For locally-streaming functions, we need to store both the streaming
3556-
// & non-streaming VG. Spill the streaming value first.
3557-
BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
3558-
.addImm(1)
3559-
.setMIFlag(MachineInstr::FrameSetup);
3560-
BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
3561-
.addReg(Reg1)
3562-
.addImm(3)
3563-
.addImm(63)
3564-
.setMIFlag(MachineInstr::FrameSetup);
3565-
3566-
AFI->setStreamingVGIdx(RPI.FrameIdx);
3567-
} else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
3573+
if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
35683574
BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
35693575
.addImm(31)
35703576
.addImm(1)
35713577
.setMIFlag(MachineInstr::FrameSetup);
3572-
AFI->setVGIdx(RPI.FrameIdx);
35733578
} else {
35743579
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
3575-
if (llvm::any_of(
3576-
MBB.liveins(),
3577-
[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3578-
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3579-
AArch64::X0, LiveIn.PhysReg);
3580-
}))
3580+
if (any_of(MBB.liveins(),
3581+
[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3582+
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3583+
AArch64::X0, LiveIn.PhysReg);
3584+
})) {
35813585
X0Scratch = Reg1;
3582-
3583-
if (X0Scratch != AArch64::NoRegister)
3584-
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1)
3585-
.addReg(AArch64::XZR)
3586-
.addReg(AArch64::X0, RegState::Undef)
3587-
.addReg(AArch64::X0, RegState::Implicit)
3586+
BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), X0Scratch)
3587+
.addReg(AArch64::X0)
35883588
.setMIFlag(MachineInstr::FrameSetup);
3589+
}
35893590

35903591
RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;
35913592
const uint32_t *RegMask =
@@ -3596,7 +3597,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35963597
.addReg(AArch64::X0, RegState::ImplicitDefine)
35973598
.setMIFlag(MachineInstr::FrameSetup);
35983599
Reg1 = AArch64::X0;
3599-
AFI->setVGIdx(RPI.FrameIdx);
36003600
}
36013601
}
36023602

@@ -3691,13 +3691,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
36913691
if (RPI.isPaired())
36923692
MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
36933693
}
3694-
3695-
if (X0Scratch != AArch64::NoRegister)
3696-
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
3697-
.addReg(AArch64::XZR)
3698-
.addReg(X0Scratch, RegState::Undef)
3699-
.addReg(X0Scratch, RegState::Implicit)
3700-
.setMIFlag(MachineInstr::FrameSetup);
37013694
}
37023695
return true;
37033696
}
@@ -4076,15 +4069,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
40764069

40774070
// Increase the callee-saved stack size if the function has streaming mode
40784071
// changes, as we will need to spill the value of the VG register.
4079-
// For locally streaming functions, we spill both the streaming and
4080-
// non-streaming VG value.
4081-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
4082-
if (requiresSaveVG(MF)) {
4083-
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
4084-
CSStackSize += 16;
4085-
else
4086-
CSStackSize += 8;
4087-
}
4072+
if (requiresSaveVG(MF))
4073+
CSStackSize += 8;
40884074

40894075
// Determine if a Hazard slot should be used, and increase the CSStackSize by
40904076
// StackHazardSize if so.
@@ -4235,29 +4221,13 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
42354221

42364222
// Insert VG into the list of CSRs, immediately before LR if saved.
42374223
if (requiresSaveVG(MF)) {
4238-
std::vector<CalleeSavedInfo> VGSaves;
4239-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
4240-
4241-
auto VGInfo = CalleeSavedInfo(AArch64::VG);
4242-
VGInfo.setRestored(false);
4243-
VGSaves.push_back(VGInfo);
4244-
4245-
// Add VG again if the function is locally-streaming, as we will spill two
4246-
// values.
4247-
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
4248-
VGSaves.push_back(VGInfo);
4249-
4250-
bool InsertBeforeLR = false;
4251-
4252-
for (unsigned I = 0; I < CSI.size(); I++)
4253-
if (CSI[I].getReg() == AArch64::LR) {
4254-
InsertBeforeLR = true;
4255-
CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
4256-
break;
4257-
}
4258-
4259-
if (!InsertBeforeLR)
4260-
llvm::append_range(CSI, VGSaves);
4224+
CalleeSavedInfo VGInfo(AArch64::VG);
4225+
auto It =
4226+
find_if(CSI, [](auto &Info) { return Info.getReg() == AArch64::LR; });
4227+
if (It != CSI.end())
4228+
CSI.insert(It, VGInfo);
4229+
else
4230+
CSI.push_back(VGInfo);
42614231
}
42624232

42634233
Register LastReg = 0;
@@ -5260,46 +5230,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
52605230
}
52615231
} // namespace
52625232

5263-
static void emitVGSaveRestore(MachineBasicBlock::iterator II,
5264-
const AArch64FrameLowering *TFI) {
5265-
MachineInstr &MI = *II;
5266-
MachineBasicBlock *MBB = MI.getParent();
5267-
MachineFunction *MF = MBB->getParent();
5268-
5269-
if (MI.getOpcode() != AArch64::VGSavePseudo &&
5270-
MI.getOpcode() != AArch64::VGRestorePseudo)
5271-
return;
5272-
5273-
auto *AFI = MF->getInfo<AArch64FunctionInfo>();
5274-
SMEAttrs FuncAttrs = AFI->getSMEFnAttrs();
5275-
bool LocallyStreaming =
5276-
FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface();
5277-
5278-
int64_t VGFrameIdx =
5279-
LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx();
5280-
assert(VGFrameIdx != std::numeric_limits<int>::max() &&
5281-
"Expected FrameIdx for VG");
5282-
5283-
CFIInstBuilder CFIBuilder(*MBB, II, MachineInstr::NoFlags);
5284-
if (MI.getOpcode() == AArch64::VGSavePseudo) {
5285-
const MachineFrameInfo &MFI = MF->getFrameInfo();
5286-
int64_t Offset =
5287-
MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea();
5288-
CFIBuilder.buildOffset(AArch64::VG, Offset);
5289-
} else {
5290-
CFIBuilder.buildRestore(AArch64::VG);
5291-
}
5292-
5293-
MI.eraseFromParent();
5294-
}
5295-
52965233
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
52975234
MachineFunction &MF, RegScavenger *RS = nullptr) const {
52985235
for (auto &BB : MF)
52995236
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
5300-
if (requiresSaveVG(MF))
5301-
emitVGSaveRestore(II++, this);
5302-
else if (StackTaggingMergeSetTag)
5237+
if (StackTaggingMergeSetTag)
53035238
II = tryMergeAdjacentSTG(II, this, RS);
53045239
}
53055240

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9517,17 +9517,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
95179517

95189518
SDValue InGlue;
95199519
if (RequiresSMChange) {
9520-
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9521-
Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
9522-
DAG.getVTList(MVT::Other, MVT::Glue), Chain);
9523-
InGlue = Chain.getValue(1);
9524-
}
9525-
9526-
SDValue NewChain =
9520+
Chain =
95279521
changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(),
95289522
Chain, InGlue, getSMToggleCondition(CallAttrs));
9529-
Chain = NewChain.getValue(0);
9530-
InGlue = NewChain.getValue(1);
9523+
InGlue = Chain.getValue(1);
95319524
}
95329525

95339526
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -9712,13 +9705,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
97129705
Result = changeStreamingMode(
97139706
DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue,
97149707
getSMToggleCondition(CallAttrs));
9715-
9716-
if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
9717-
InGlue = Result.getValue(1);
9718-
Result =
9719-
DAG.getNode(AArch64ISD::VG_RESTORE, DL,
9720-
DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
9721-
}
97229708
}
97239709

97249710
if (RequiresLazySave || CallAttrs.requiresEnablingZAAfterCall())

0 commit comments

Comments
 (0)