Skip to content

Commit 7723913

Browse files
epilkslinder1RamNalamothu
committed
[AMDGPU] Implement CFI for CSR spills
Introduce new SPILL pseudos to allow CFI to be generated for only CSR spills, and to make ISA-instruction-level accurate information. Other targets either generate slightly incorrect information or rely on conventions for how spills are placed within the entry block. The approach in this change produces larger unwind tables, with the increased size being spent on additional DW_CFA_advance_location instructions needed to describe the unwinding accurately. Co-authored-by: Scott Linder <[email protected]> Co-authored-by: Venkata Ramanaiah Nalamothu <[email protected]>
1 parent ee97ce9 commit 7723913

File tree

101 files changed

+22312
-12206
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+22312
-12206
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2244,17 +2244,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
22442244
return true;
22452245
}
22462246

2247+
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2248+
const TargetRegisterInfo *TRI) {
2249+
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2250+
if (MBB.isLiveIn(*R)) {
2251+
return true;
2252+
}
2253+
}
2254+
return false;
2255+
}
2256+
22472257
bool SIFrameLowering::spillCalleeSavedRegisters(
22482258
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
22492259
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
22502260
MachineFunction *MF = MBB.getParent();
22512261
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2252-
if (!ST.useVGPRBlockOpsForCSR())
2253-
return false;
2262+
const SIInstrInfo *TII = ST.getInstrInfo();
2263+
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2264+
2265+
if (!ST.useVGPRBlockOpsForCSR()) {
2266+
for (const CalleeSavedInfo &CS : CSI) {
2267+
// Insert the spill to the stack frame.
2268+
unsigned Reg = CS.getReg();
2269+
2270+
if (CS.isSpilledToReg()) {
2271+
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
2272+
CS.getDstReg())
2273+
.addReg(Reg, getKillRegState(true));
2274+
} else {
2275+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
2276+
Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32);
2277+
// If this value was already livein, we probably have a direct use of
2278+
// the incoming register value, so don't kill at the spill point. This
2279+
// happens since we pass some special inputs (workgroup IDs) in the
2280+
// callee saved range.
2281+
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2282+
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
2283+
RC, TRI);
2284+
}
2285+
}
2286+
return true;
2287+
}
22542288

22552289
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2256-
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2257-
const SIInstrInfo *TII = ST.getInstrInfo();
22582290
SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
22592291

22602292
const TargetRegisterClass *BlockRegClass =
@@ -2278,10 +2310,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
22782310
FrameInfo.getObjectAlign(FrameIndex));
22792311

22802312
BuildMI(MBB, MI, MI->getDebugLoc(),
2281-
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2313+
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
22822314
.addReg(Reg, getKillRegState(false))
22832315
.addFrameIndex(FrameIndex)
2284-
.addReg(MFI->getStackPtrOffsetReg())
2316+
.addReg(FuncInfo->getStackPtrOffsetReg())
22852317
.addImm(0)
22862318
.addImm(Mask)
22872319
.addMemOperand(MMO);
@@ -2467,6 +2499,22 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,
24672499
.setMIFlag(flag);
24682500
}
24692501

2502+
MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill(
2503+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2504+
const DebugLoc &DL, const Register Reg, const Register RegCopy) const {
2505+
MachineFunction &MF = *MBB.getParent();
2506+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2507+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2508+
2509+
unsigned MaskReg = MCRI.getDwarfRegNum(
2510+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2511+
auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask(
2512+
nullptr, MCRI.getDwarfRegNum(Reg, false),
2513+
MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg,
2514+
ST.getWavefrontSize());
2515+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2516+
}
2517+
24702518
MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
24712519
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
24722520
const DebugLoc &DL, const Register SGPR, const Register VGPR,
@@ -2515,6 +2563,34 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
25152563
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25162564
}
25172565

2566+
MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill(
2567+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2568+
const DebugLoc &DL, unsigned SGPR, int64_t Offset) const {
2569+
MachineFunction &MF = *MBB.getParent();
2570+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2571+
return buildCFI(MBB, MBBI, DL,
2572+
llvm::MCCFIInstruction::createOffset(
2573+
nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset));
2574+
}
2575+
2576+
MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill(
2577+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2578+
const DebugLoc &DL, unsigned VGPR, int64_t Offset) const {
2579+
const MachineFunction &MF = *MBB.getParent();
2580+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2581+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2582+
2583+
int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);
2584+
assert(DwarfVGPR != -1);
2585+
2586+
unsigned MaskReg = MCRI.getDwarfRegNum(
2587+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2588+
auto CFIInst = MCCFIInstruction::createLLVMVectorOffset(
2589+
nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(),
2590+
Offset);
2591+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2592+
}
2593+
25182594
MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25192595
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
25202596
const DebugLoc &DL, const Register Reg, const Register SGPRPair) const {
@@ -2535,3 +2611,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25352611
nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);
25362612
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25372613
}
2614+
2615+
MachineInstr *
2616+
SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB,
2617+
MachineBasicBlock::iterator MBBI,
2618+
const DebugLoc &DL, Register Reg) const {
2619+
const MachineFunction &MF = *MBB.getParent();
2620+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2621+
int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);
2622+
auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);
2623+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2624+
}

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
120120
const DebugLoc &DL, const MCCFIInstruction &CFIInst,
121121
MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const;
122122

123+
/// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another
124+
/// VGPR/AGPR \p RegCopy and build a MachineInstr around it.
125+
MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB,
126+
MachineBasicBlock::iterator MBBI,
127+
const DebugLoc &DL,
128+
const Register Reg,
129+
const Register RegCopy) const;
123130
/// Create a CFI index describing a spill of an SGPR to a single lane of
124131
/// a VGPR and build a MachineInstr around it.
125132
MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB,
@@ -134,10 +141,25 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
134141
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
135142
const DebugLoc &DL, Register SGPR,
136143
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const;
144+
/// Create a CFI index describing a spill of a SGPR to VMEM and
145+
/// build a MachineInstr around it.
146+
MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB,
147+
MachineBasicBlock::iterator MBBI,
148+
const DebugLoc &DL, unsigned SGPR,
149+
int64_t Offset) const;
150+
/// Create a CFI index describing a spill of a VGPR to VMEM and
151+
/// build a MachineInstr around it.
152+
MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB,
153+
MachineBasicBlock::iterator MBBI,
154+
const DebugLoc &DL, unsigned VGPR,
155+
int64_t Offset) const;
137156
MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB,
138157
MachineBasicBlock::iterator MBBI,
139158
const DebugLoc &DL, Register Reg,
140159
Register SGPRPair) const;
160+
MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB,
161+
MachineBasicBlock::iterator MBBI,
162+
const DebugLoc &DL, Register Reg) const;
141163
// Returns true if the function may need to reserve space on the stack for the
142164
// CWSR trap handler.
143165
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;

0 commit comments

Comments
 (0)