Skip to content

Commit 9624e4d

Browse files
epilkslinder1RamNalamothu
committed
[AMDGPU] Implement CFI for CSR spills
Introduce new SPILL pseudos to allow CFI to be generated for only CSR spills, and to make ISA-instruction-level accurate information. Other targets either generate slightly incorrect information or rely on conventions for how spills are placed within the entry block. The approach in this change produces larger unwind tables, with the increased size being spent on additional DW_CFA_advance_location instructions needed to describe the unwinding accurately. Co-authored-by: Scott Linder <[email protected]> Co-authored-by: Venkata Ramanaiah Nalamothu <[email protected]>
1 parent 17e3b5d commit 9624e4d

File tree

92 files changed

+16066
-5877
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+16066
-5877
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2234,17 +2234,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
22342234
return true;
22352235
}
22362236

2237+
static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
2238+
const TargetRegisterInfo *TRI) {
2239+
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
2240+
if (MBB.isLiveIn(*R)) {
2241+
return true;
2242+
}
2243+
}
2244+
return false;
2245+
}
2246+
22372247
bool SIFrameLowering::spillCalleeSavedRegisters(
22382248
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
22392249
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
22402250
MachineFunction *MF = MBB.getParent();
22412251
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2242-
if (!ST.useVGPRBlockOpsForCSR())
2243-
return false;
2252+
const SIInstrInfo *TII = ST.getInstrInfo();
2253+
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);
2254+
2255+
if (!ST.useVGPRBlockOpsForCSR()) {
2256+
for (const CalleeSavedInfo &CS : CSI) {
2257+
// Insert the spill to the stack frame.
2258+
unsigned Reg = CS.getReg();
2259+
2260+
if (CS.isSpilledToReg()) {
2261+
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
2262+
CS.getDstReg())
2263+
.addReg(Reg, getKillRegState(true));
2264+
} else {
2265+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
2266+
Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32);
2267+
// If this value was already livein, we probably have a direct use of
2268+
// the incoming register value, so don't kill at the spill point. This
2269+
// happens since we pass some special inputs (workgroup IDs) in the
2270+
// callee saved range.
2271+
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
2272+
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
2273+
RC, TRI);
2274+
}
2275+
}
2276+
return true;
2277+
}
22442278

22452279
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2246-
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
2247-
const SIInstrInfo *TII = ST.getInstrInfo();
22482280
SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
22492281

22502282
const TargetRegisterClass *BlockRegClass =
@@ -2268,10 +2300,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
22682300
FrameInfo.getObjectAlign(FrameIndex));
22692301

22702302
BuildMI(MBB, MI, MI->getDebugLoc(),
2271-
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
2303+
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
22722304
.addReg(Reg, getKillRegState(false))
22732305
.addFrameIndex(FrameIndex)
2274-
.addReg(MFI->getStackPtrOffsetReg())
2306+
.addReg(FuncInfo->getStackPtrOffsetReg())
22752307
.addImm(0)
22762308
.addImm(Mask)
22772309
.addMemOperand(MMO);
@@ -2459,6 +2491,22 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,
24592491
.setMIFlag(flag);
24602492
}
24612493

2494+
MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill(
2495+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2496+
const DebugLoc &DL, const Register Reg, const Register RegCopy) const {
2497+
MachineFunction &MF = *MBB.getParent();
2498+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2499+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2500+
2501+
unsigned MaskReg = MCRI.getDwarfRegNum(
2502+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2503+
auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask(
2504+
nullptr, MCRI.getDwarfRegNum(Reg, false),
2505+
MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg,
2506+
ST.getWavefrontSize());
2507+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2508+
}
2509+
24622510
MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
24632511
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
24642512
const DebugLoc &DL, const Register SGPR, const Register VGPR,
@@ -2507,6 +2555,34 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
25072555
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25082556
}
25092557

2558+
MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill(
2559+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2560+
const DebugLoc &DL, unsigned SGPR, int64_t Offset) const {
2561+
MachineFunction &MF = *MBB.getParent();
2562+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2563+
return buildCFI(MBB, MBBI, DL,
2564+
llvm::MCCFIInstruction::createOffset(
2565+
nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset));
2566+
}
2567+
2568+
MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill(
2569+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2570+
const DebugLoc &DL, unsigned VGPR, int64_t Offset) const {
2571+
const MachineFunction &MF = *MBB.getParent();
2572+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2573+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2574+
2575+
int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);
2576+
assert(DwarfVGPR != -1);
2577+
2578+
unsigned MaskReg = MCRI.getDwarfRegNum(
2579+
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
2580+
auto CFIInst = MCCFIInstruction::createLLVMVectorOffset(
2581+
nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(),
2582+
Offset);
2583+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2584+
}
2585+
25102586
MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25112587
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
25122588
const DebugLoc &DL, const Register Reg, const Register SGPRPair) const {
@@ -2527,3 +2603,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25272603
nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);
25282604
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
25292605
}
2606+
2607+
MachineInstr *
2608+
SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB,
2609+
MachineBasicBlock::iterator MBBI,
2610+
const DebugLoc &DL, Register Reg) const {
2611+
const MachineFunction &MF = *MBB.getParent();
2612+
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
2613+
int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);
2614+
auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);
2615+
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
2616+
}

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
120120
const DebugLoc &DL, const MCCFIInstruction &CFIInst,
121121
MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const;
122122

123+
/// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another
124+
/// VGPR/AGPR \p RegCopy and build a MachineInstr around it.
125+
MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB,
126+
MachineBasicBlock::iterator MBBI,
127+
const DebugLoc &DL,
128+
const Register Reg,
129+
const Register RegCopy) const;
123130
/// Create a CFI index describing a spill of an SGPR to a single lane of
124131
/// a VGPR and build a MachineInstr around it.
125132
MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB,
@@ -134,10 +141,25 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
134141
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
135142
const DebugLoc &DL, Register SGPR,
136143
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const;
144+
/// Create a CFI index describing a spill of a SGPR to VMEM and
145+
/// build a MachineInstr around it.
146+
MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB,
147+
MachineBasicBlock::iterator MBBI,
148+
const DebugLoc &DL, unsigned SGPR,
149+
int64_t Offset) const;
150+
/// Create a CFI index describing a spill of a VGPR to VMEM and
151+
/// build a MachineInstr around it.
152+
MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB,
153+
MachineBasicBlock::iterator MBBI,
154+
const DebugLoc &DL, unsigned VGPR,
155+
int64_t Offset) const;
137156
MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB,
138157
MachineBasicBlock::iterator MBBI,
139158
const DebugLoc &DL, Register Reg,
140159
Register SGPRPair) const;
160+
MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB,
161+
MachineBasicBlock::iterator MBBI,
162+
const DebugLoc &DL, Register Reg) const;
141163
// Returns true if the function may need to reserve space on the stack for the
142164
// CWSR trap handler.
143165
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;

0 commit comments

Comments
 (0)