Skip to content

Commit b72549b

Browse files
epilkslinder1RamNalamothu
committed
[AMDGPU] Implement CFI for CSR spills
Introduce new SPILL pseudos to allow CFI to be generated for only CSR spills, and to make ISA-instruction-level accurate information. Other targets either generate slightly incorrect information or rely on conventions for how spills are placed within the entry block. The approach in this change produces larger unwind tables, with the increased size being spent on additional DW_CFA_advance_location instructions needed to describe the unwinding accurately. Co-authored-by: Scott Linder <[email protected]> Co-authored-by: Venkata Ramanaiah Nalamothu <[email protected]>
1 parent fc1ef9f commit b72549b

File tree

87 files changed

+7892
-3627
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+7892
-3627
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2245,17 +2245,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
22452245
return true;
22462246
}
22472247

2248+
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2249+
const TargetRegisterInfo *TRI) {
2250+
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2251+
if (MBB.isLiveIn(*R)) {
2252+
return true;
2253+
}
2254+
}
2255+
return false;
2256+
}
2257+
22482258
bool SIFrameLowering::spillCalleeSavedRegisters(
22492259
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
22502260
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
22512261
MachineFunction *MF = MBB.getParent();
22522262
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2253-
if (!ST.useVGPRBlockOpsForCSR())
2254-
return false;
2263+
const SIInstrInfo *TII = ST.getInstrInfo();
2264+
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2265+
2266+
if (!ST.useVGPRBlockOpsForCSR()) {
2267+
for (const CalleeSavedInfo &CS : CSI) {
2268+
// Insert the spill to the stack frame.
2269+
unsigned Reg = CS.getReg();
2270+
2271+
if (CS.isSpilledToReg()) {
2272+
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
2273+
CS.getDstReg())
2274+
.addReg(Reg, getKillRegState(true));
2275+
} else {
2276+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
2277+
Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32);
2278+
// If this value was already livein, we probably have a direct use of
2279+
// the incoming register value, so don't kill at the spill point. This
2280+
// happens since we pass some special inputs (workgroup IDs) in the
2281+
// callee saved range.
2282+
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2283+
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
2284+
RC);
2285+
}
2286+
}
2287+
return true;
2288+
}
22552289

22562290
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2257-
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2258-
const SIInstrInfo *TII = ST.getInstrInfo();
22592291
SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
22602292

22612293
const TargetRegisterClass *BlockRegClass =
@@ -2279,10 +2311,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
22792311
FrameInfo.getObjectAlign(FrameIndex));
22802312

22812313
BuildMI(MBB, MI, MI->getDebugLoc(),
2282-
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2314+
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
22832315
.addReg(Reg, getKillRegState(false))
22842316
.addFrameIndex(FrameIndex)
2285-
.addReg(MFI->getStackPtrOffsetReg())
2317+
.addReg(FuncInfo->getStackPtrOffsetReg())
22862318
.addImm(0)
22872319
.addImm(Mask)
22882320
.addMemOperand(MMO);
@@ -2470,6 +2502,22 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,
24702502
.setMIFlag(flag);
24712503
}
24722504

2505+
MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill(
2506+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2507+
const DebugLoc &DL, const Register Reg, const Register RegCopy) const {
2508+
MachineFunction &MF = *MBB.getParent();
2509+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2510+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2511+
2512+
unsigned MaskReg = MCRI.getDwarfRegNum(
2513+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2514+
auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask(
2515+
nullptr, MCRI.getDwarfRegNum(Reg, false),
2516+
MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg,
2517+
ST.getWavefrontSize());
2518+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2519+
}
2520+
24732521
MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
24742522
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
24752523
const DebugLoc &DL, const Register SGPR, const Register VGPR,
@@ -2518,6 +2566,34 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
25182566
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25192567
}
25202568

2569+
MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill(
2570+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2571+
const DebugLoc &DL, unsigned SGPR, int64_t Offset) const {
2572+
MachineFunction &MF = *MBB.getParent();
2573+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2574+
return buildCFI(MBB, MBBI, DL,
2575+
llvm::MCCFIInstruction::createOffset(
2576+
nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset));
2577+
}
2578+
2579+
MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill(
2580+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2581+
const DebugLoc &DL, unsigned VGPR, int64_t Offset) const {
2582+
const MachineFunction &MF = *MBB.getParent();
2583+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2584+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2585+
2586+
int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);
2587+
assert(DwarfVGPR != -1);
2588+
2589+
unsigned MaskReg = MCRI.getDwarfRegNum(
2590+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2591+
auto CFIInst = MCCFIInstruction::createLLVMVectorOffset(
2592+
nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(),
2593+
Offset);
2594+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2595+
}
2596+
25212597
MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25222598
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
25232599
const DebugLoc &DL, const Register Reg, const Register SGPRPair) const {
@@ -2538,3 +2614,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25382614
nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);
25392615
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25402616
}
2617+
2618+
MachineInstr *
2619+
SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB,
2620+
MachineBasicBlock::iterator MBBI,
2621+
const DebugLoc &DL, Register Reg) const {
2622+
const MachineFunction &MF = *MBB.getParent();
2623+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2624+
int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);
2625+
auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);
2626+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2627+
}

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
120120
const DebugLoc &DL, const MCCFIInstruction &CFIInst,
121121
MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const;
122122

123+
/// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another
124+
/// VGPR/AGPR \p RegCopy and build a MachineInstr around it.
125+
MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB,
126+
MachineBasicBlock::iterator MBBI,
127+
const DebugLoc &DL,
128+
const Register Reg,
129+
const Register RegCopy) const;
123130
/// Create a CFI index describing a spill of an SGPR to a single lane of
124131
/// a VGPR and build a MachineInstr around it.
125132
MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB,
@@ -134,10 +141,25 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
134141
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
135142
const DebugLoc &DL, Register SGPR,
136143
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const;
144+
/// Create a CFI index describing a spill of a SGPR to VMEM and
145+
/// build a MachineInstr around it.
146+
MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB,
147+
MachineBasicBlock::iterator MBBI,
148+
const DebugLoc &DL, unsigned SGPR,
149+
int64_t Offset) const;
150+
/// Create a CFI index describing a spill of a VGPR to VMEM and
151+
/// build a MachineInstr around it.
152+
MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB,
153+
MachineBasicBlock::iterator MBBI,
154+
const DebugLoc &DL, unsigned VGPR,
155+
int64_t Offset) const;
137156
MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB,
138157
MachineBasicBlock::iterator MBBI,
139158
const DebugLoc &DL, Register Reg,
140159
Register SGPRPair) const;
160+
MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB,
161+
MachineBasicBlock::iterator MBBI,
162+
const DebugLoc &DL, Register Reg) const;
141163
// Returns true if the function may need to reserve space on the stack for the
142164
// CWSR trap handler.
143165
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;

0 commit comments

Comments
 (0)