Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,20 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(

std::vector<CalleeSavedInfo> CSI;
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
MCRegister RetAddrReg = TRI->getReturnAddressReg(MF);
MCRegister RetAddrRegSub0 = TRI->getSubReg(RetAddrReg, AMDGPU::sub0);
MCRegister RetAddrRegSub1 = TRI->getSubReg(RetAddrReg, AMDGPU::sub1);
bool SpillRetAddrReg = false;

for (unsigned I = 0; CSRegs[I]; ++I) {
MCRegister Reg = CSRegs[I];

if (SavedRegs.test(Reg)) {
if (Reg == RetAddrRegSub0 || Reg == RetAddrRegSub1) {
SpillRetAddrReg = true;
continue;
}

const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, MVT::i32);
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
Expand All @@ -282,6 +291,18 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
}
}

// Return address uses a register pair. Add the super register to the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this really special case the return address? Could we compact the representation by emitting CFI for the largest possible tuple covering?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return address is special-cased in the spec for CFI itself, and must be a single "register" (the CFI notion of a register, actually just a column in the table that the CFI instructions build). So at some point we collapse any representation down into a special case for return address.

Really we should be tracking the pieces independently, but it would require a late pass to materialize the final CFI instructions, or some special casing for new CFI_INSTRUCTION kinds.

// CSI list so that it's easier to identify the entire spill and CFI
// can be emitted appropriately.
if (SpillRetAddrReg) {
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(RetAddrReg, MVT::i64);
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
TRI->getSpillAlign(*RC), true);
CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI));
CalleeSavedFIs.push_back(JunkFI);
}

if (!CSI.empty()) {
for (MachineBasicBlock *SaveBlock : SaveBlocks)
insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ define ptr addrspace(1) @call_assert_align() {
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: global_store_dword v[0:1], v2, off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: s_mov_b32 s32, s33
; CHECK-NEXT: v_readlane_b32 s4, v40, 2
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ define void @func_caller_stack() #2 {
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
Expand Down Expand Up @@ -277,8 +277,8 @@ define void @func_caller_stack() #2 {
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
Expand Down Expand Up @@ -363,8 +363,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
; MUBUF-NEXT: s_waitcnt vmcnt(1)
; MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
Expand Down Expand Up @@ -414,8 +414,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:56
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,8 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) #0 {
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], 0
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_mov_b32 s32, s33
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ define void @parent_func_missing_inputs() #0 {
; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
; FIXEDABI-NEXT: s_mov_b32 s32, s33
; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2
; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
; DAGISEL-NEXT: s_clause 0x1
; DAGISEL-NEXT: scratch_load_b32 v41, off, s33
; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1
; DAGISEL-NEXT: v_readlane_b32 s30, v42, 0
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1
; DAGISEL-NEXT: s_mov_b32 s32, s33
; DAGISEL-NEXT: v_readlane_b32 s0, v42, 2
; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1
Expand Down Expand Up @@ -78,8 +78,8 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
; GISEL-NEXT: s_clause 0x1
; GISEL-NEXT: scratch_load_b32 v41, off, s33
; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4
; GISEL-NEXT: v_readlane_b32 s31, v42, 1
; GISEL-NEXT: v_readlane_b32 s30, v42, 0
; GISEL-NEXT: v_readlane_b32 s31, v42, 1
; GISEL-NEXT: s_mov_b32 s32, s33
; GISEL-NEXT: v_readlane_b32 s0, v42, 2
; GISEL-NEXT: s_or_saveexec_b32 s1, -1
Expand Down Expand Up @@ -787,8 +787,8 @@ define amdgpu_gfx void @ret_void(i32 %x) #0 {
; DAGISEL-NEXT: s_wait_alu 0xfffe
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1
; DAGISEL-NEXT: v_readlane_b32 s30, v40, 0
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1
; DAGISEL-NEXT: s_mov_b32 s32, s33
; DAGISEL-NEXT: v_readlane_b32 s0, v40, 2
; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1
Expand Down Expand Up @@ -822,8 +822,8 @@ define amdgpu_gfx void @ret_void(i32 %x) #0 {
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: s_mov_b32 s32, s33
; GISEL-NEXT: v_readlane_b32 s0, v40, 2
; GISEL-NEXT: s_or_saveexec_b32 s1, -1
Expand Down
Loading
Loading