Skip to content

Commit ebced26

Browse files
committed
Make CFI for SuperRegs more clear
1 parent 5a1bcb2 commit ebced26

File tree

2 files changed

+51
-41
lines changed

2 files changed

+51
-41
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 47 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,28 @@ class PrologEpilogSGPRSpillBuilder {
317317
bool IsFramePtrPrologSpill;
318318
bool NeedsFrameMoves;
319319

320-
bool isExec(Register Reg) const {
320+
static bool isExec(Register Reg) {
321321
return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC;
322322
}
323323

324+
/// If this builder requires SuperReg-based CFI, which is emitted after all
325+
/// SubRegs are actually spilled, return the Register which should be used
326+
/// as input to getDwarfRegNum. Otherwise, CFI should be generated per-SubReg.
327+
///
328+
/// Note: Most spills handled by this builder generate CFI after each
329+
/// SubReg spill, as each SubReg maps directly to a CFI register via
330+
/// getDwarfRegNum(SubReg, false). All other cases currently currently
331+
/// correspond to the SuperReg directly.
332+
std::optional<Register> getCFISuperReg() const {
333+
if (IsFramePtrPrologSpill)
334+
return FuncInfo->getFrameOffsetReg();
335+
// FIXME: CFI for EXEC needs a fix by accurately computing the spill
336+
// offset for both the low and high components.
337+
if (isExec(SuperReg))
338+
return AMDGPU::EXEC;
339+
return std::nullopt;
340+
}
341+
324342
void saveToMemory(const int FI) const {
325343
MachineRegisterInfo &MRI = MF.getRegInfo();
326344
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -333,6 +351,13 @@ class PrologEpilogSGPRSpillBuilder {
333351
if (!TmpVGPR)
334352
report_fatal_error("failed to find free scratch register");
335353

354+
auto BuildCFI = [&](Register Reg) {
355+
TFI->buildCFI(MBB, MI, DL,
356+
MCCFIInstruction::createOffset(
357+
nullptr, MCRI->getDwarfRegNum(Reg, false),
358+
MFI.getObjectOffset(FI) * ST.getWavefrontSize()));
359+
};
360+
std::optional<Register> CFISuperReg = getCFISuperReg();
336361
for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
337362
Register SubReg = NumSubRegs == 1
338363
? SuperReg
@@ -342,23 +367,12 @@ class PrologEpilogSGPRSpillBuilder {
342367

343368
buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
344369
FI, FrameReg, DwordOff);
345-
if (NeedsFrameMoves) {
346-
if (isExec(SuperReg) && (I == NumSubRegs - 1))
347-
SubReg = AMDGPU::EXEC;
348-
else if (IsFramePtrPrologSpill)
349-
SubReg = FuncInfo->getFrameOffsetReg();
350-
351-
// FIXME: CFI for EXEC needs a fix by accurately computing the spill
352-
// offset for both the low and high components.
353-
if (SubReg != AMDGPU::EXEC_LO) {
354-
TFI->buildCFI(MBB, MI, DL,
355-
MCCFIInstruction::createOffset(
356-
nullptr, MCRI->getDwarfRegNum(SubReg, false),
357-
MFI.getObjectOffset(FI) * ST.getWavefrontSize()));
358-
}
359-
}
370+
if (NeedsFrameMoves && !CFISuperReg)
371+
BuildCFI(SubReg);
360372
DwordOff += 4;
361373
}
374+
if (NeedsFrameMoves && CFISuperReg)
375+
BuildCFI(*CFISuperReg);
362376
}
363377

364378
void saveToVGPRLane(const int FI) const {
@@ -369,6 +383,7 @@ class PrologEpilogSGPRSpillBuilder {
369383
FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
370384
assert(Spill.size() == NumSubRegs);
371385

386+
std::optional<Register> CFISuperReg = getCFISuperReg();
372387
for (unsigned I = 0; I < NumSubRegs; ++I) {
373388
Register SubReg = NumSubRegs == 1
374389
? SuperReg
@@ -378,20 +393,12 @@ class PrologEpilogSGPRSpillBuilder {
378393
.addReg(SubReg)
379394
.addImm(Spill[I].Lane)
380395
.addReg(Spill[I].VGPR, RegState::Undef);
381-
if (NeedsFrameMoves) {
382-
if (isExec(SuperReg)) {
383-
if (I == NumSubRegs - 1)
384-
TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, AMDGPU::EXEC, Spill);
385-
} else if (IsFramePtrPrologSpill) {
386-
TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL,
387-
FuncInfo->getFrameOffsetReg(),
388-
Spill[I].VGPR, Spill[I].Lane);
389-
} else {
396+
if (NeedsFrameMoves && !CFISuperReg)
390397
TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, SubReg, Spill[I].VGPR,
391398
Spill[I].Lane);
392-
}
393-
}
394399
}
400+
if (NeedsFrameMoves && CFISuperReg)
401+
TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, *CFISuperReg, Spill);
395402
}
396403

397404
void copyToScratchSGPR(Register DstReg) const {
@@ -1194,13 +1201,11 @@ static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
11941201
return ScratchExecCopy;
11951202
}
11961203

1197-
void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
1198-
MachineBasicBlock &MBB,
1199-
MachineBasicBlock::iterator MBBI,
1200-
DebugLoc &DL, LiveRegUnits &LiveUnits,
1201-
Register FrameReg,
1202-
Register FramePtrRegScratchCopy,
1203-
const bool NeedsFrameMoves) const {
1204+
void SIFrameLowering::emitCSRSpillStores(
1205+
MachineFunction &MF, MachineBasicBlock &MBB,
1206+
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1207+
LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy,
1208+
const bool NeedsFrameMoves) const {
12041209
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
12051210
MachineFrameInfo &MFI = MF.getFrameInfo();
12061211
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -1313,8 +1318,9 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
13131318

13141319
void SIFrameLowering::emitCSRSpillRestores(
13151320
MachineFunction &MF, MachineBasicBlock &MBB,
1316-
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
1317-
Register FrameReg, Register FramePtrRegScratchCopy) const {
1321+
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1322+
LiveRegUnits &LiveUnits, Register FrameReg,
1323+
Register FramePtrRegScratchCopy) const {
13181324
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13191325
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
13201326
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -2495,6 +2501,9 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
24952501
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
24962502
const DebugLoc &DL, Register SGPR,
24972503
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const {
2504+
if (VGPRSpills.size() == 1u)
2505+
return buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, SGPR, VGPRSpills[0].VGPR,
2506+
VGPRSpills[0].Lane);
24982507
const MachineFunction &MF = *MBB.getParent();
24992508
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
25002509

@@ -2526,8 +2535,8 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25262535
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
25272536
const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
25282537

2529-
int SGPR0 = TRI.getSubReg(SGPRPair, AMDGPU::sub0);
2530-
int SGPR1 = TRI.getSubReg(SGPRPair, AMDGPU::sub1);
2538+
MCRegister SGPR0 = TRI.getSubReg(SGPRPair, AMDGPU::sub0);
2539+
MCRegister SGPR1 = TRI.getSubReg(SGPRPair, AMDGPU::sub1);
25312540

25322541
int DwarfReg = MCRI.getDwarfRegNum(Reg, false);
25332542
int DwarfSGPR0 = MCRI.getDwarfRegNum(SGPR0, false);

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
3737
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs,
3838
bool NeedExecCopyReservedReg) const;
3939
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
40-
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
40+
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
4141
LiveRegUnits &LiveUnits, Register FrameReg,
4242
Register FramePtrRegScratchCopy,
4343
const bool NeedsFrameMoves) const;
4444
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
45-
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
46-
LiveRegUnits &LiveUnits, Register FrameReg,
45+
MachineBasicBlock::iterator MBBI,
46+
const DebugLoc &DL, LiveRegUnits &LiveUnits,
47+
Register FrameReg,
4748
Register FramePtrRegScratchCopy) const;
4849
bool
4950
assignCalleeSavedSpillSlots(MachineFunction &MF,

0 commit comments

Comments
 (0)