Skip to content

Commit 7175200

Browse files
committed
[AMDGPU] Convert flat scratch SS->SV in FI elimination
- Fix O0 crash on gfx950 by remapping SS to SV and materializing the offset in a VGPR when FrameReg is unavailable and no SGPR can be scavenged. Resolves issue #155902 - Reuse existing VGPR temp if available; otherwise scavenge one. - Add regression: llvm/test/CodeGen/AMDGPU/flat-scratch-ss-to-sv-scavenge.ll. Co-authored by Matt Arsenault
1 parent a7c0e78 commit 7175200

File tree

2 files changed

+666
-2
lines changed

2 files changed

+666
-2
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2981,8 +2981,42 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
29812981
: RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
29822982
MI, false, 0, !UseSGPR);
29832983

2984-
// TODO: for flat scratch another attempt can be made with a VGPR index
2985-
// if no SGPRs can be scavenged.
2984+
// Fallback: If we need an SGPR but cannot scavenge one and there is no
2985+
// frame register, try to convert the flat-scratch instruction to use a
2986+
// VGPR index (SS -> SV) and materialize the offset in a VGPR.
2987+
if (!TmpSReg && !FrameReg && TII->isFLATScratch(*MI)) {
2988+
// Reuse an existing VGPR temp if available, otherwise scavenge one.
2989+
Register VTmp = (!UseSGPR && TmpReg)
2990+
? TmpReg
2991+
: RS->scavengeRegisterBackwards(
2992+
AMDGPU::VGPR_32RegClass, MI, false, 0);
2993+
if (VTmp) {
2994+
// Put the large offset into a VGPR and zero the immediate offset.
2995+
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), VTmp)
2996+
.addImm(Offset);
2997+
2998+
unsigned Opc = MI->getOpcode();
2999+
int NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc);
3000+
if (NewOpc != -1) {
3001+
int OldSAddrIdx =
3002+
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
3003+
int NewVAddrIdx =
3004+
AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
3005+
if (OldSAddrIdx == NewVAddrIdx && OldSAddrIdx >= 0) {
3006+
MI->setDesc(TII->get(NewOpc));
3007+
// Replace former saddr (now vaddr) with the VGPR index.
3008+
MI->getOperand(NewVAddrIdx).ChangeToRegister(VTmp, false);
3009+
// Reset the immediate offset to 0 as it is now in vaddr.
3010+
MachineOperand *OffOp =
3011+
TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
3012+
assert(OffOp && "Flat scratch SV form must have offset operand");
3013+
OffOp->setImm(0);
3014+
return false;
3015+
}
3016+
}
3017+
}
3018+
}
3019+
29863020
if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
29873021
report_fatal_error("Cannot scavenge register in FI elimination!");
29883022

0 commit comments

Comments
 (0)