@@ -317,10 +317,28 @@ class PrologEpilogSGPRSpillBuilder {
317317 bool IsFramePtrPrologSpill;
318318 bool NeedsFrameMoves;
319319
320- bool isExec (Register Reg) const {
320+ static bool isExec (Register Reg) {
321321 return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC;
322322 }
323323
324+ // / If this builder requires SuperReg-based CFI, which is emitted after all
325+ // / SubRegs are actually spilled, return the Register which should be used
326+ // / as input to getDwarfRegNum. Otherwise, CFI should be generated per-SubReg.
327+ // /
328+ // / Note: Most spills handled by this builder generate CFI after each
329+ // / SubReg spill, as each SubReg maps directly to a CFI register via
330+ // / getDwarfRegNum(SubReg, false). All other cases currently currently
331+ // / correspond to the SuperReg directly.
332+ std::optional<Register> getCFISuperReg () const {
333+ if (IsFramePtrPrologSpill)
334+ return FuncInfo->getFrameOffsetReg ();
335+ // FIXME: CFI for EXEC needs a fix by accurately computing the spill
336+ // offset for both the low and high components.
337+ if (isExec (SuperReg))
338+ return AMDGPU::EXEC;
339+ return std::nullopt ;
340+ }
341+
324342 void saveToMemory (const int FI) const {
325343 MachineRegisterInfo &MRI = MF.getRegInfo ();
326344 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
@@ -333,6 +351,13 @@ class PrologEpilogSGPRSpillBuilder {
333351 if (!TmpVGPR)
334352 report_fatal_error (" failed to find free scratch register" );
335353
354+ auto BuildCFI = [&](Register Reg) {
355+ TFI->buildCFI (MBB, MI, DL,
356+ MCCFIInstruction::createOffset (
357+ nullptr , MCRI->getDwarfRegNum (Reg, false ),
358+ MFI.getObjectOffset (FI) * ST.getWavefrontSize ()));
359+ };
360+ std::optional<Register> CFISuperReg = getCFISuperReg ();
336361 for (unsigned I = 0 , DwordOff = 0 ; I < NumSubRegs; ++I) {
337362 Register SubReg = NumSubRegs == 1
338363 ? SuperReg
@@ -342,23 +367,12 @@ class PrologEpilogSGPRSpillBuilder {
342367
343368 buildPrologSpill (ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
344369 FI, FrameReg, DwordOff);
345- if (NeedsFrameMoves) {
346- if (isExec (SuperReg) && (I == NumSubRegs - 1 ))
347- SubReg = AMDGPU::EXEC;
348- else if (IsFramePtrPrologSpill)
349- SubReg = FuncInfo->getFrameOffsetReg ();
350-
351- // FIXME: CFI for EXEC needs a fix by accurately computing the spill
352- // offset for both the low and high components.
353- if (SubReg != AMDGPU::EXEC_LO) {
354- TFI->buildCFI (MBB, MI, DL,
355- MCCFIInstruction::createOffset (
356- nullptr , MCRI->getDwarfRegNum (SubReg, false ),
357- MFI.getObjectOffset (FI) * ST.getWavefrontSize ()));
358- }
359- }
370+ if (NeedsFrameMoves && !CFISuperReg)
371+ BuildCFI (SubReg);
360372 DwordOff += 4 ;
361373 }
374+ if (NeedsFrameMoves && CFISuperReg)
375+ BuildCFI (*CFISuperReg);
362376 }
363377
364378 void saveToVGPRLane (const int FI) const {
@@ -369,6 +383,7 @@ class PrologEpilogSGPRSpillBuilder {
369383 FuncInfo->getSGPRSpillToPhysicalVGPRLanes (FI);
370384 assert (Spill.size () == NumSubRegs);
371385
386+ std::optional<Register> CFISuperReg = getCFISuperReg ();
372387 for (unsigned I = 0 ; I < NumSubRegs; ++I) {
373388 Register SubReg = NumSubRegs == 1
374389 ? SuperReg
@@ -378,20 +393,12 @@ class PrologEpilogSGPRSpillBuilder {
378393 .addReg (SubReg)
379394 .addImm (Spill[I].Lane )
380395 .addReg (Spill[I].VGPR , RegState::Undef);
381- if (NeedsFrameMoves) {
382- if (isExec (SuperReg)) {
383- if (I == NumSubRegs - 1 )
384- TFI->buildCFIForSGPRToVGPRSpill (MBB, MI, DL, AMDGPU::EXEC, Spill);
385- } else if (IsFramePtrPrologSpill) {
386- TFI->buildCFIForSGPRToVGPRSpill (MBB, MI, DL,
387- FuncInfo->getFrameOffsetReg (),
388- Spill[I].VGPR , Spill[I].Lane );
389- } else {
396+ if (NeedsFrameMoves && !CFISuperReg)
390397 TFI->buildCFIForSGPRToVGPRSpill (MBB, MI, DL, SubReg, Spill[I].VGPR ,
391398 Spill[I].Lane );
392- }
393- }
394399 }
400+ if (NeedsFrameMoves && CFISuperReg)
401+ TFI->buildCFIForSGPRToVGPRSpill (MBB, MI, DL, *CFISuperReg, Spill);
395402 }
396403
397404 void copyToScratchSGPR (Register DstReg) const {
@@ -1194,13 +1201,11 @@ static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
11941201 return ScratchExecCopy;
11951202}
11961203
1197- void SIFrameLowering::emitCSRSpillStores (MachineFunction &MF,
1198- MachineBasicBlock &MBB,
1199- MachineBasicBlock::iterator MBBI,
1200- DebugLoc &DL, LiveRegUnits &LiveUnits,
1201- Register FrameReg,
1202- Register FramePtrRegScratchCopy,
1203- const bool NeedsFrameMoves) const {
1204+ void SIFrameLowering::emitCSRSpillStores (
1205+ MachineFunction &MF, MachineBasicBlock &MBB,
1206+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1207+ LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy,
1208+ const bool NeedsFrameMoves) const {
12041209 SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
12051210 MachineFrameInfo &MFI = MF.getFrameInfo ();
12061211 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
@@ -1313,8 +1318,9 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
13131318
13141319void SIFrameLowering::emitCSRSpillRestores (
13151320 MachineFunction &MF, MachineBasicBlock &MBB,
1316- MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
1317- Register FrameReg, Register FramePtrRegScratchCopy) const {
1321+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1322+ LiveRegUnits &LiveUnits, Register FrameReg,
1323+ Register FramePtrRegScratchCopy) const {
13181324 const SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
13191325 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
13201326 const SIInstrInfo *TII = ST.getInstrInfo ();
@@ -2495,6 +2501,9 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
24952501 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
24962502 const DebugLoc &DL, Register SGPR,
24972503 ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const {
2504+ if (VGPRSpills.size () == 1u )
2505+ return buildCFIForSGPRToVGPRSpill (MBB, MBBI, DL, SGPR, VGPRSpills[0 ].VGPR ,
2506+ VGPRSpills[0 ].Lane );
24982507 const MachineFunction &MF = *MBB.getParent ();
24992508 const MCRegisterInfo &MCRI = *MF.getContext ().getRegisterInfo ();
25002509
@@ -2526,8 +2535,8 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
25262535 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
25272536 const SIRegisterInfo &TRI = ST.getInstrInfo ()->getRegisterInfo ();
25282537
2529- int SGPR0 = TRI.getSubReg (SGPRPair, AMDGPU::sub0);
2530- int SGPR1 = TRI.getSubReg (SGPRPair, AMDGPU::sub1);
2538+ MCRegister SGPR0 = TRI.getSubReg (SGPRPair, AMDGPU::sub0);
2539+ MCRegister SGPR1 = TRI.getSubReg (SGPRPair, AMDGPU::sub1);
25312540
25322541 int DwarfReg = MCRI.getDwarfRegNum (Reg, false );
25332542 int DwarfSGPR0 = MCRI.getDwarfRegNum (SGPR0, false );
0 commit comments