Skip to content

Commit 29a752f

Browse files
committed
[AArch64][SME] Move saving VG for the unwinder out of frame lowering
This patch moves computing and storing of VG in functions with streaming mode changes to a new pre-RA pass (MachineSMEABI). The goal is to make saving VG simpler, as computing VG may require calling `__arm_get_current_vg` -- which requires saving X0 around the call and the LR (among complexities in frame lowering). Doing this pre-RA allows the register allocator to handle this (rather than manual scavenging). The MachineSMEABI saves to VG to AArch64::SAVED_STREAMING_VG_SLOT and AArch64::SAVED_VG_SLOT target frame indices. These will be resolved to an actual frame indices during PEI (as they are not known before then). For the most part this does not significantly change codegen, however, there is one downside that resolving the frame indices outside of the prologue may need exta instructions (to step past later allocations on the stack, such as scalable vectors). Fixes #145635
1 parent 36a060a commit 29a752f

32 files changed

+1429
-1234
lines changed

llvm/lib/CodeGen/MachineVerifier.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,8 +2529,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
25292529
}
25302530

25312531
// Check that an instruction has register operands only as expected.
2532-
if (MCOI.OperandType == MCOI::OPERAND_REGISTER &&
2533-
!MO->isReg() && !MO->isFI())
2532+
if (MCOI.OperandType == MCOI::OPERAND_REGISTER && !MO->isReg() &&
2533+
!MO->isFI() && !MO->isTargetIndex())
25342534
report("Expected a register operand.", MO, MONum);
25352535
if (MO->isReg()) {
25362536
if (MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
6060
FunctionPass *createAArch64CollectLOHPass();
6161
FunctionPass *createSMEABIPass();
6262
FunctionPass *createSMEPeepholeOptPass();
63+
FunctionPass *createMachineSMEABIPass();
6364
ModulePass *createSVEIntrinsicOptsPass();
6465
InstructionSelector *
6566
createAArch64InstructionSelector(const AArch64TargetMachine &,
@@ -111,8 +112,14 @@ void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
111112
void initializeLDTLSCleanupPass(PassRegistry&);
112113
void initializeSMEABIPass(PassRegistry &);
113114
void initializeSMEPeepholeOptPass(PassRegistry &);
115+
void initializeMachineSMEABIPass(PassRegistry &);
114116
void initializeSVEIntrinsicOptsPass(PassRegistry &);
115117
void initializeAArch64Arm64ECCallLoweringPass(PassRegistry &);
118+
119+
namespace AArch64 {
120+
enum TargetIndex { SAVED_VG_SLOT, SAVED_STREAMING_VG_SLOT };
121+
}
122+
116123
} // end namespace llvm
117124

118125
#endif

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 33 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,10 @@ static bool needsWinCFI(const MachineFunction &MF);
333333
static StackOffset getSVEStackSize(const MachineFunction &MF);
334334
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
335335
bool HasCall = false);
336-
static bool requiresSaveVG(const MachineFunction &MF);
336+
static bool requiresSaveVG(const MachineFunction &MF) {
337+
return MF.getSubtarget<AArch64Subtarget>().getRegisterInfo()->requiresSaveVG(
338+
MF);
339+
}
337340

338341
/// Returns true if a homogeneous prolog or epilog code can be emitted
339342
/// for the size optimization. If possible, a frame helper call is injected.
@@ -1105,8 +1108,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
11051108

11061109
// May need a scratch register (for return value) if require making a special
11071110
// call
1108-
if (requiresSaveVG(*MF) ||
1109-
windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))
1111+
if (windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))
11101112
if (findScratchNonCalleeSaveRegister(TmpMBB, true) == AArch64::NoRegister)
11111113
return false;
11121114

@@ -1391,38 +1393,6 @@ bool requiresGetVGCall(MachineFunction &MF) {
13911393
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
13921394
}
13931395

1394-
static bool requiresSaveVG(const MachineFunction &MF) {
1395-
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1396-
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
1397-
// is enabled with streaming mode changes.
1398-
if (!AFI->hasStreamingModeChanges())
1399-
return false;
1400-
auto &ST = MF.getSubtarget<AArch64Subtarget>();
1401-
if (ST.isTargetDarwin())
1402-
return ST.hasSVE();
1403-
return true;
1404-
}
1405-
1406-
bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
1407-
unsigned Opc = MBBI->getOpcode();
1408-
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1409-
Opc == AArch64::UBFMXri)
1410-
return true;
1411-
1412-
if (requiresGetVGCall(*MBBI->getMF())) {
1413-
if (Opc == AArch64::ORRXrr)
1414-
return true;
1415-
1416-
if (Opc == AArch64::BL) {
1417-
auto Op1 = MBBI->getOperand(0);
1418-
return Op1.isSymbol() &&
1419-
(StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
1420-
}
1421-
}
1422-
1423-
return false;
1424-
}
1425-
14261396
// Convert callee-save register save/restore instruction to do stack pointer
14271397
// decrement/increment to allocate/deallocate the callee-save stack area by
14281398
// converting store/load to use pre/post increment version.
@@ -1434,15 +1404,6 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
14341404
int CFAOffset = 0) {
14351405
unsigned NewOpc;
14361406

1437-
// If the function contains streaming mode changes, we expect instructions
1438-
// to calculate the value of VG before spilling. For locally-streaming
1439-
// functions, we need to do this for both the streaming and non-streaming
1440-
// vector length. Move past these instructions if necessary.
1441-
MachineFunction &MF = *MBB.getParent();
1442-
if (requiresSaveVG(MF))
1443-
while (isVGInstruction(MBBI))
1444-
++MBBI;
1445-
14461407
switch (MBBI->getOpcode()) {
14471408
default:
14481409
llvm_unreachable("Unexpected callee-save save/restore opcode!");
@@ -1979,9 +1940,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19791940
// pointer bump above.
19801941
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
19811942
!IsSVECalleeSave(MBBI)) {
1982-
if (CombineSPBump &&
1983-
// Only fix-up frame-setup load/store instructions.
1984-
(!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
1943+
if (CombineSPBump)
19851944
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
19861945
NeedsWinCFI, &HasWinCFI);
19871946
++MBBI;
@@ -3403,66 +3362,19 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
34033362
StrOpc =
34043363
Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI;
34053364
break;
3406-
case RegPairInfo::VG:
3407-
StrOpc = AArch64::STRXui;
3408-
break;
3409-
}
3410-
3411-
unsigned X0Scratch = AArch64::NoRegister;
3412-
if (Reg1 == AArch64::VG) {
3413-
// Find an available register to store value of VG to.
3414-
Reg1 = findScratchNonCalleeSaveRegister(&MBB, true);
3415-
assert(Reg1 != AArch64::NoRegister);
3365+
case RegPairInfo::VG: {
34163366
SMEAttrs Attrs = AFI->getSMEFnAttrs();
3417-
34183367
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
34193368
AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
34203369
// For locally-streaming functions, we need to store both the streaming
3421-
// & non-streaming VG. Spill the streaming value first.
3422-
BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
3423-
.addImm(1)
3424-
.setMIFlag(MachineInstr::FrameSetup);
3425-
BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
3426-
.addReg(Reg1)
3427-
.addImm(3)
3428-
.addImm(63)
3429-
.setMIFlag(MachineInstr::FrameSetup);
3430-
3370+
// & non-streaming VG.
34313371
AFI->setStreamingVGIdx(RPI.FrameIdx);
3432-
} else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
3433-
BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
3434-
.addImm(31)
3435-
.addImm(1)
3436-
.setMIFlag(MachineInstr::FrameSetup);
3437-
AFI->setVGIdx(RPI.FrameIdx);
34383372
} else {
3439-
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
3440-
if (llvm::any_of(
3441-
MBB.liveins(),
3442-
[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3443-
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3444-
AArch64::X0, LiveIn.PhysReg);
3445-
}))
3446-
X0Scratch = Reg1;
3447-
3448-
if (X0Scratch != AArch64::NoRegister)
3449-
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1)
3450-
.addReg(AArch64::XZR)
3451-
.addReg(AArch64::X0, RegState::Undef)
3452-
.addReg(AArch64::X0, RegState::Implicit)
3453-
.setMIFlag(MachineInstr::FrameSetup);
3454-
3455-
const uint32_t *RegMask = TRI->getCallPreservedMask(
3456-
MF,
3457-
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
3458-
BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
3459-
.addExternalSymbol("__arm_get_current_vg")
3460-
.addRegMask(RegMask)
3461-
.addReg(AArch64::X0, RegState::ImplicitDefine)
3462-
.setMIFlag(MachineInstr::FrameSetup);
3463-
Reg1 = AArch64::X0;
34643373
AFI->setVGIdx(RPI.FrameIdx);
34653374
}
3375+
// VG will be written to the frame indices immediately after the prologue.
3376+
continue;
3377+
}
34663378
}
34673379

34683380
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
@@ -3556,13 +3468,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35563468
if (RPI.isPaired())
35573469
MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
35583470
}
3559-
3560-
if (X0Scratch != AArch64::NoRegister)
3561-
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
3562-
.addReg(AArch64::XZR)
3563-
.addReg(X0Scratch, RegState::Undef)
3564-
.addReg(X0Scratch, RegState::Implicit)
3565-
.setMIFlag(MachineInstr::FrameSetup);
35663471
}
35673472
return true;
35683473
}
@@ -4092,31 +3997,19 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
40923997
MaxCSFrameIndex = FrameIdx;
40933998
}
40943999

4095-
// Insert VG into the list of CSRs, immediately before LR if saved.
40964000
if (requiresSaveVG(MF)) {
4097-
std::vector<CalleeSavedInfo> VGSaves;
4098-
SMEAttrs Attrs = AFI->getSMEFnAttrs();
4099-
4100-
auto VGInfo = CalleeSavedInfo(AArch64::VG);
4001+
CalleeSavedInfo VGInfo(AArch64::VG);
41014002
VGInfo.setRestored(false);
4102-
VGSaves.push_back(VGInfo);
4003+
SmallVector<CalleeSavedInfo, 2> VGSaves{VGInfo};
41034004

41044005
// Add VG again if the function is locally-streaming, as we will spill two
41054006
// values.
4007+
SMEAttrs Attrs = AFI->getSMEFnAttrs();
41064008
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
41074009
VGSaves.push_back(VGInfo);
41084010

4109-
bool InsertBeforeLR = false;
4110-
4111-
for (unsigned I = 0; I < CSI.size(); I++)
4112-
if (CSI[I].getReg() == AArch64::LR) {
4113-
InsertBeforeLR = true;
4114-
CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
4115-
break;
4116-
}
4117-
4118-
if (!InsertBeforeLR)
4119-
llvm::append_range(CSI, VGSaves);
4011+
// Insert the VG saves at the start of the CSI (alongside GPRs).
4012+
CSI.insert(CSI.begin(), VGSaves.begin(), VGSaves.end());
41204013
}
41214014

41224015
Register LastReg = 0;
@@ -5135,13 +5028,28 @@ static void emitVGSaveRestore(MachineBasicBlock::iterator II,
51355028
MI.eraseFromParent();
51365029
}
51375030

5031+
static void replaceVGTargetIndices(MachineBasicBlock::iterator II,
5032+
AArch64FunctionInfo *AFI) {
5033+
for (auto &MO : II->explicit_operands()) {
5034+
if (MO.isTargetIndex()) {
5035+
if (MO.getIndex() == AArch64::SAVED_STREAMING_VG_SLOT)
5036+
MO.ChangeToFrameIndex(AFI->getStreamingVGIdx());
5037+
if (MO.getIndex() == AArch64::SAVED_VG_SLOT)
5038+
MO.ChangeToFrameIndex(AFI->getVGIdx());
5039+
}
5040+
}
5041+
}
5042+
51385043
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
51395044
MachineFunction &MF, RegScavenger *RS = nullptr) const {
5045+
bool VGSaved = requiresSaveVG(MF);
5046+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
51405047
for (auto &BB : MF)
51415048
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
5142-
if (requiresSaveVG(MF))
5049+
if (VGSaved) {
5050+
replaceVGTargetIndices(II, AFI);
51435051
emitVGSaveRestore(II++, this);
5144-
else if (StackTaggingMergeSetTag)
5052+
} else if (StackTaggingMergeSetTag)
51455053
II = tryMergeAdjacentSTG(II, this, RS);
51465054
}
51475055
}

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3688,6 +3688,14 @@ static unsigned offsetExtendOpcode(unsigned Opcode) {
36883688
}
36893689
}
36903690

3691+
ArrayRef<std::pair<int, const char *>>
3692+
AArch64InstrInfo::getSerializableTargetIndices() const {
3693+
static constexpr std::pair<int, const char *> TargetIndices[] = {
3694+
{AArch64::SAVED_VG_SLOT, "saved-vg-slot"},
3695+
{AArch64::SAVED_STREAMING_VG_SLOT, "saved-streaming-vg-slot"}};
3696+
return TargetIndices;
3697+
}
3698+
36913699
MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
36923700
const ExtAddrMode &AM) const {
36933701

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
299299
MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
300300
const ExtAddrMode &AM) const override;
301301

302+
ArrayRef<std::pair<int, const char *>>
303+
getSerializableTargetIndices() const override;
304+
302305
bool getMemOperandsWithOffsetWidth(
303306
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
304307
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,18 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
673673
return false;
674674
}
675675

676+
bool AArch64RegisterInfo::requiresSaveVG(const MachineFunction &MF) const {
677+
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
678+
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
679+
// is enabled with streaming mode changes.
680+
if (!AFI->hasStreamingModeChanges())
681+
return false;
682+
auto &ST = MF.getSubtarget<AArch64Subtarget>();
683+
if (ST.isTargetDarwin())
684+
return ST.hasSVE();
685+
return true;
686+
}
687+
676688
bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
677689
MCRegister Reg) const {
678690
CallingConv::ID CC = MF.getFunction().getCallingConv();

llvm/lib/Target/AArch64/AArch64RegisterInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
127127
bool hasBasePointer(const MachineFunction &MF) const;
128128
unsigned getBaseRegister() const;
129129

130+
bool requiresSaveVG(const MachineFunction &MF) const;
131+
130132
bool isArgumentRegister(const MachineFunction &MF,
131133
MCRegister Reg) const override;
132134

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,9 @@ bool AArch64PassConfig::addILPOpts() {
803803
}
804804

805805
void AArch64PassConfig::addPreRegAlloc() {
806+
// Insert VG saves for the unwinder.
807+
addPass(createMachineSMEABIPass());
808+
806809
// Change dead register definitions to refer to the zero register.
807810
if (TM->getOptLevel() != CodeGenOptLevel::None &&
808811
EnableDeadRegisterElimination)

llvm/lib/Target/AArch64/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ add_llvm_target(AArch64CodeGen
8787
AArch64TargetObjectFile.cpp
8888
AArch64TargetTransformInfo.cpp
8989
SMEABIPass.cpp
90+
MachineSMEABIPass.cpp
9091
SMEPeepholeOpt.cpp
9192
SVEIntrinsicOpts.cpp
9293
AArch64SIMDInstrOpt.cpp

0 commit comments

Comments
 (0)