Skip to content

Commit 89930e9

Browse files
epilkslinder1RamNalamothu
committed
[AMDGPU] Implement -amdgpu-spill-cfi-saved-regs
These spills need special CFI anyway, so implementing them directly where CFI is emitted avoids the need to invent a mechanism to track them from ISel. Co-authored-by: Scott Linder <[email protected]> Co-authored-by: Venkata Ramanaiah Nalamothu <[email protected]>
1 parent 6a34648 commit 89930e9

File tree

8 files changed

+2623
-14
lines changed

8 files changed

+2623
-14
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -670,12 +670,21 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
670670
}
671671

672672
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
673-
// memory. They should have been removed by now.
674-
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
673+
// memory. They should have been removed by now, except CFI Saved Reg spills.
674+
static bool allStackObjectsAreDead(const MachineFunction &MF) {
675+
const MachineFrameInfo &MFI = MF.getFrameInfo();
676+
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
675677
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
676678
I != E; ++I) {
677-
if (!MFI.isDeadObjectIndex(I))
679+
if (!MFI.isDeadObjectIndex(I)) {
680+
// determineCalleeSaves() might have added the SGPRSpill stack IDs for
681+
// CFI saves into scratch VGPR, ignore them
682+
if (MFI.getStackID(I) == TargetStackID::SGPRSpill &&
683+
FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
684+
continue;
685+
}
678686
return false;
687+
}
679688
}
680689

681690
return true;
@@ -695,8 +704,8 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
695704

696705
Register ScratchRsrcReg = MFI->getScratchRSrcReg();
697706

698-
if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
699-
allStackObjectsAreDead(MF.getFrameInfo())))
707+
if (!ScratchRsrcReg ||
708+
(!MRI.isPhysRegUsed(ScratchRsrcReg) && allStackObjectsAreDead(MF)))
700709
return Register();
701710

702711
if (ST.hasSGPRInitBug() ||
@@ -923,7 +932,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
923932
bool NeedsFlatScratchInit =
924933
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
925934
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
926-
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
935+
(!allStackObjectsAreDead(MF) && ST.enableFlatScratch()));
927936

928937
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
929938
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
@@ -1314,6 +1323,11 @@ void SIFrameLowering::emitCSRSpillStores(
13141323
LiveUnits.addReg(Reg);
13151324
}
13161325
}
1326+
1327+
// Remove the spill entry created for EXEC. It is needed only for CFISaves in
1328+
// the prologue.
1329+
if (TRI.isCFISavedRegsSpillEnabled())
1330+
FuncInfo->removePrologEpilogSGPRSpillEntry(TRI.getExec());
13171331
}
13181332

13191333
void SIFrameLowering::emitCSRSpillRestores(
@@ -1796,14 +1810,14 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
17961810
// can. Any remaining SGPR spills will go to memory, so move them back to the
17971811
// default stack.
17981812
bool HaveSGPRToVMemSpill =
1799-
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1813+
FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ true);
18001814
assert(allSGPRSpillsAreDead(MF) &&
18011815
"SGPR spill should have been removed in SILowerSGPRSpills");
18021816

18031817
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
18041818
// but currently hasNonSpillStackObjects is set only from source
18051819
// allocas. Stack temps produced from legalization are not counted currently.
1806-
if (!allStackObjectsAreDead(MFI)) {
1820+
if (!allStackObjectsAreDead(MF)) {
18071821
assert(RS && "RegScavenger required if spilling");
18081822

18091823
// Add an emergency spill slot
@@ -1903,6 +1917,18 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
19031917
MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
19041918
}
19051919

1920+
if (TRI->isCFISavedRegsSpillEnabled()) {
1921+
Register Exec = TRI->getExec();
1922+
assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) &&
1923+
"Re-reserving spill slot for EXEC");
1924+
// FIXME: Machine Copy Propagation currently optimizes away the EXEC copy to
1925+
// the scratch as we emit it only in the prolog. This optimization should
1926+
// not happen for frame related instructions. Until this is fixed ignore
1927+
// copy to scratch SGPR.
1928+
getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC,
1929+
/*IncludeScratchCopy=*/false);
1930+
}
1931+
19061932
// hasFP only knows about stack objects that already exist. We're now
19071933
// determining the stack slots that will be created, so we have to predict
19081934
// them. Stack objects force FP usage with calls.
@@ -1912,8 +1938,7 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
19121938
//
19131939
// FIXME: Is this really hasReservedCallFrame?
19141940
const bool WillHaveFP =
1915-
FrameInfo.hasCalls() &&
1916-
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1941+
FrameInfo.hasCalls() && (SavedVGPRs.any() || !allStackObjectsAreDead(MF));
19171942

19181943
if (WillHaveFP || hasFP(MF)) {
19191944
Register FramePtrReg = MFI->getFrameOffsetReg();

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
115115
public:
116116
bool requiresStackPointerReference(const MachineFunction &MF) const;
117117

118+
/// If '-amdgpu-spill-cfi-saved-regs' is enabled, emit RA/EXEC spills to
119+
/// a free VGPR (lanes) or memory and corresponding CFI rules.
120+
void emitCFISavedRegSpills(MachineFunction &MF, MachineBasicBlock &MBB,
121+
MachineBasicBlock::iterator MBBI,
122+
LiveRegUnits &LiveRegs,
123+
bool emitSpillsToMem) const;
124+
118125
/// Create a CFI index for CFIInst and build a MachineInstr around it.
119126
MachineInstr *
120127
buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
532532
// free frame index ids by the later pass(es) like "stack slot coloring"
533533
// which in turn could mess-up with the book keeping of "frame index to VGPR
534534
// lane".
535-
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
535+
FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ false);
536536

537537
MadeChange = true;
538538
}

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
566566
}
567567

568568
bool SIMachineFunctionInfo::removeDeadFrameIndices(
569-
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
569+
MachineFunction &MF, bool ResetSGPRSpillStackIDs) {
570+
MachineFrameInfo &MFI = MF.getFrameInfo();
570571
// Remove dead frame indices from function frame, however keep FP & BP since
571572
// spills for them haven't been inserted yet. And also make sure to remove the
572573
// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,16 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
757757
}) != PrologEpilogSGPRSpills.end();
758758
}
759759

760+
// Remove if an entry created for \p Reg.
761+
void removePrologEpilogSGPRSpillEntry(Register Reg) {
762+
auto I = find_if(PrologEpilogSGPRSpills,
763+
[&Reg](const auto &Spill) { return Spill.first == Reg; });
764+
if (I == PrologEpilogSGPRSpills.end())
765+
return;
766+
767+
PrologEpilogSGPRSpills.erase(I);
768+
}
769+
760770
const PrologEpilogSGPRSaveRestoreInfo &
761771
getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const {
762772
const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) {
@@ -835,8 +845,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
835845

836846
/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
837847
/// to the default stack.
838-
bool removeDeadFrameIndices(MachineFrameInfo &MFI,
839-
bool ResetSGPRSpillStackIDs);
848+
bool removeDeadFrameIndices(MachineFunction &MF, bool ResetSGPRSpillStackIDs);
840849

841850
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
842851
std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
3535
cl::ReallyHidden,
3636
cl::init(true));
3737

38+
static cl::opt<bool> EnableSpillCFISavedRegs(
39+
"amdgpu-spill-cfi-saved-regs",
40+
cl::desc("Enable spilling the registers required for CFI emission"),
41+
cl::ReallyHidden, cl::init(false), cl::ZeroOrMore);
42+
3843
std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
3944
std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
4045

@@ -561,6 +566,10 @@ unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
561566
return SubRegFromChannelTable[NumRegIndex - 1][Channel];
562567
}
563568

569+
bool SIRegisterInfo::isCFISavedRegsSpillEnabled() const {
570+
return EnableSpillCFISavedRegs;
571+
}
572+
564573
MCRegister
565574
SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF,
566575
const unsigned Align,

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
8080
return SpillSGPRToVGPR;
8181
}
8282

83+
bool isCFISavedRegsSpillEnabled() const;
84+
8385
/// Return the largest available SGPR aligned to \p Align for the register
8486
/// class \p RC.
8587
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,

0 commit comments

Comments
 (0)