Skip to content

Commit 20b3150

Browse files
epilkslinder1RamNalamothu
committed
[AMDGPU] Implement CFI for CSR spills
Introduce new SPILL pseudos to allow CFI to be generated for only CSR spills, and to make ISA-instruction-level accurate information. Other targets either generate slightly incorrect information or rely on conventions for how spills are placed within the entry block. The approach in this change produces larger unwind tables, with the increased size being spent on additional DW_CFA_advance_location instructions needed to describe the unwinding accurately. Co-authored-by: Scott Linder <[email protected]> Co-authored-by: Venkata Ramanaiah Nalamothu <[email protected]>
1 parent 89377a5 commit 20b3150

File tree

87 files changed

+7892
-3627
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+7892
-3627
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2251,17 +2251,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
22512251
return true;
22522252
}
22532253

2254+
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2255+
const TargetRegisterInfo *TRI) {
2256+
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2257+
if (MBB.isLiveIn(*R)) {
2258+
return true;
2259+
}
2260+
}
2261+
return false;
2262+
}
2263+
22542264
bool SIFrameLowering::spillCalleeSavedRegisters(
22552265
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
22562266
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
22572267
MachineFunction *MF = MBB.getParent();
22582268
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2259-
if (!ST.useVGPRBlockOpsForCSR())
2260-
return false;
2269+
const SIInstrInfo *TII = ST.getInstrInfo();
2270+
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2271+
2272+
if (!ST.useVGPRBlockOpsForCSR()) {
2273+
for (const CalleeSavedInfo &CS : CSI) {
2274+
// Insert the spill to the stack frame.
2275+
unsigned Reg = CS.getReg();
2276+
2277+
if (CS.isSpilledToReg()) {
2278+
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
2279+
CS.getDstReg())
2280+
.addReg(Reg, getKillRegState(true));
2281+
} else {
2282+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
2283+
Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32);
2284+
// If this value was already livein, we probably have a direct use of
2285+
// the incoming register value, so don't kill at the spill point. This
2286+
// happens since we pass some special inputs (workgroup IDs) in the
2287+
// callee saved range.
2288+
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2289+
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
2290+
RC);
2291+
}
2292+
}
2293+
return true;
2294+
}
22612295

22622296
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2263-
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2264-
const SIInstrInfo *TII = ST.getInstrInfo();
22652297
SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
22662298

22672299
const TargetRegisterClass *BlockRegClass =
@@ -2285,10 +2317,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
22852317
FrameInfo.getObjectAlign(FrameIndex));
22862318

22872319
BuildMI(MBB, MI, MI->getDebugLoc(),
2288-
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2320+
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
22892321
.addReg(Reg, getKillRegState(false))
22902322
.addFrameIndex(FrameIndex)
2291-
.addReg(MFI->getStackPtrOffsetReg())
2323+
.addReg(FuncInfo->getStackPtrOffsetReg())
22922324
.addImm(0)
22932325
.addImm(Mask)
22942326
.addMemOperand(MMO);
@@ -2476,6 +2508,22 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,
24762508
.setMIFlag(flag);
24772509
}
24782510

2511+
MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill(
2512+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2513+
const DebugLoc &DL, const Register Reg, const Register RegCopy) const {
2514+
MachineFunction &MF = *MBB.getParent();
2515+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2516+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2517+
2518+
unsigned MaskReg = MCRI.getDwarfRegNum(
2519+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2520+
auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask(
2521+
nullptr, MCRI.getDwarfRegNum(Reg, false),
2522+
MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg,
2523+
ST.getWavefrontSize());
2524+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2525+
}
2526+
24792527
MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
24802528
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
24812529
const DebugLoc &DL, const Register SGPR, const Register VGPR,
@@ -2527,6 +2575,34 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
25272575
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25282576
}
25292577

2578+
MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill(
2579+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2580+
const DebugLoc &DL, unsigned SGPR, int64_t Offset) const {
2581+
MachineFunction &MF = *MBB.getParent();
2582+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2583+
return buildCFI(MBB, MBBI, DL,
2584+
llvm::MCCFIInstruction::createOffset(
2585+
nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset));
2586+
}
2587+
2588+
MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill(
2589+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2590+
const DebugLoc &DL, unsigned VGPR, int64_t Offset) const {
2591+
const MachineFunction &MF = *MBB.getParent();
2592+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2593+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2594+
2595+
int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);
2596+
assert(DwarfVGPR != -1);
2597+
2598+
unsigned MaskReg = MCRI.getDwarfRegNum(
2599+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2600+
auto CFIInst = MCCFIInstruction::createLLVMVectorOffset(
2601+
nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(),
2602+
Offset);
2603+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2604+
}
2605+
25302606
MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25312607
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
25322608
const DebugLoc &DL, const Register Reg, const Register SGPRPair) const {
@@ -2547,3 +2623,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25472623
nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);
25482624
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25492625
}
2626+
2627+
MachineInstr *
2628+
SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB,
2629+
MachineBasicBlock::iterator MBBI,
2630+
const DebugLoc &DL, Register Reg) const {
2631+
const MachineFunction &MF = *MBB.getParent();
2632+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2633+
int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);
2634+
auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);
2635+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2636+
}

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
121121
const DebugLoc &DL, const MCCFIInstruction &CFIInst,
122122
MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const;
123123

124+
/// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another
125+
/// VGPR/AGPR \p RegCopy and build a MachineInstr around it.
126+
MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB,
127+
MachineBasicBlock::iterator MBBI,
128+
const DebugLoc &DL,
129+
const Register Reg,
130+
const Register RegCopy) const;
124131
/// Create a CFI index describing a spill of an SGPR to a single lane of
125132
/// a VGPR and build a MachineInstr around it.
126133
MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB,
@@ -135,10 +142,25 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
135142
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
136143
const DebugLoc &DL, Register SGPR,
137144
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const;
145+
/// Create a CFI index describing a spill of a SGPR to VMEM and
146+
/// build a MachineInstr around it.
147+
MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB,
148+
MachineBasicBlock::iterator MBBI,
149+
const DebugLoc &DL, unsigned SGPR,
150+
int64_t Offset) const;
151+
/// Create a CFI index describing a spill of a VGPR to VMEM and
152+
/// build a MachineInstr around it.
153+
MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB,
154+
MachineBasicBlock::iterator MBBI,
155+
const DebugLoc &DL, unsigned VGPR,
156+
int64_t Offset) const;
138157
MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB,
139158
MachineBasicBlock::iterator MBBI,
140159
const DebugLoc &DL, Register Reg,
141160
Register SGPRPair) const;
161+
MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB,
162+
MachineBasicBlock::iterator MBBI,
163+
const DebugLoc &DL, Register Reg) const;
142164
// Returns true if the function may need to reserve space on the stack for the
143165
// CWSR trap handler.
144166
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;

0 commit comments

Comments
 (0)