Skip to content

Commit 17e3b5d

Browse files
committed
Use register pair for PC spill
1 parent e811d05 commit 17e3b5d

File tree

65 files changed

+7469
-7451
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+7469
-7451
lines changed

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,19 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
268268

269269
std::vector<CalleeSavedInfo> CSI;
270270
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
271+
Register RetAddrReg = TRI->getReturnAddressReg(MF);
272+
bool SpillRetAddrReg = false;
271273

272274
for (unsigned I = 0; CSRegs[I]; ++I) {
273275
MCRegister Reg = CSRegs[I];
274276

275277
if (SavedRegs.test(Reg)) {
278+
if (Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub0) ||
279+
Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub1)) {
280+
SpillRetAddrReg = true;
281+
continue;
282+
}
283+
276284
const TargetRegisterClass *RC =
277285
TRI->getMinimalPhysRegClass(Reg, MVT::i32);
278286
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
@@ -283,6 +291,18 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
283291
}
284292
}
285293

294+
// Return address uses a register pair. Add the super register to the
295+
// CSI list so that it's easier to identify the entire spill and CFI
296+
// can be emitted appropriately.
297+
if (SpillRetAddrReg) {
298+
const TargetRegisterClass *RC =
299+
TRI->getMinimalPhysRegClass(RetAddrReg, MVT::i64);
300+
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
301+
TRI->getSpillAlign(*RC), true);
302+
CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI));
303+
CalleeSavedFIs.push_back(JunkFI);
304+
}
305+
286306
if (!CSI.empty()) {
287307
for (MachineBasicBlock *SaveBlock : SaveBlocks)
288308
insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);

llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ define ptr addrspace(1) @call_assert_align() {
2323
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
2424
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
2525
; CHECK-NEXT: v_mov_b32_e32 v2, 0
26+
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
2627
; CHECK-NEXT: global_store_dword v[0:1], v2, off
2728
; CHECK-NEXT: s_waitcnt vmcnt(0)
2829
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
29-
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
3030
; CHECK-NEXT: s_mov_b32 s32, s33
3131
; CHECK-NEXT: v_readlane_b32 s4, v40, 2
3232
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ define void @func_caller_stack() #2 {
238238
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
239239
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
240240
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
241-
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
242241
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
242+
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
243243
; MUBUF-NEXT: s_mov_b32 s32, s33
244244
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
245245
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
@@ -277,8 +277,8 @@ define void @func_caller_stack() #2 {
277277
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
278278
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
279279
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
280-
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
281280
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
281+
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
282282
; FLATSCR-NEXT: s_mov_b32 s32, s33
283283
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
284284
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
@@ -363,8 +363,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
363363
; MUBUF-NEXT: s_waitcnt vmcnt(1)
364364
; MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60
365365
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
366-
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
367366
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
367+
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
368368
; MUBUF-NEXT: s_mov_b32 s32, s33
369369
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
370370
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
@@ -414,8 +414,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
414414
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
415415
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:56
416416
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
417-
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
418417
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
418+
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
419419
; FLATSCR-NEXT: s_mov_b32 s32, s33
420420
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
421421
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1

llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,8 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) #0 {
244244
; GFX9-NEXT: s_addk_i32 s32, 0x400
245245
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
246246
; GFX9-NEXT: s_swappc_b64 s[30:31], 0
247-
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
248247
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
248+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
249249
; GFX9-NEXT: s_mov_b32 s32, s33
250250
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
251251
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1

llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ define void @parent_func_missing_inputs() #0 {
3030
; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
3131
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
3232
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
33-
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
3433
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
34+
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
3535
; FIXEDABI-NEXT: s_mov_b32 s32, s33
3636
; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2
3737
; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1

llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
3535
; DAGISEL-NEXT: s_clause 0x1
3636
; DAGISEL-NEXT: scratch_load_b32 v41, off, s33
3737
; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4
38-
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1
3938
; DAGISEL-NEXT: v_readlane_b32 s30, v42, 0
39+
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1
4040
; DAGISEL-NEXT: s_mov_b32 s32, s33
4141
; DAGISEL-NEXT: v_readlane_b32 s0, v42, 2
4242
; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1
@@ -78,8 +78,8 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
7878
; GISEL-NEXT: s_clause 0x1
7979
; GISEL-NEXT: scratch_load_b32 v41, off, s33
8080
; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4
81-
; GISEL-NEXT: v_readlane_b32 s31, v42, 1
8281
; GISEL-NEXT: v_readlane_b32 s30, v42, 0
82+
; GISEL-NEXT: v_readlane_b32 s31, v42, 1
8383
; GISEL-NEXT: s_mov_b32 s32, s33
8484
; GISEL-NEXT: v_readlane_b32 s0, v42, 2
8585
; GISEL-NEXT: s_or_saveexec_b32 s1, -1
@@ -787,8 +787,8 @@ define amdgpu_gfx void @ret_void(i32 %x) #0 {
787787
; DAGISEL-NEXT: s_wait_alu 0xfffe
788788
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
789789
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
790-
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1
791790
; DAGISEL-NEXT: v_readlane_b32 s30, v40, 0
791+
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1
792792
; DAGISEL-NEXT: s_mov_b32 s32, s33
793793
; DAGISEL-NEXT: v_readlane_b32 s0, v40, 2
794794
; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1
@@ -822,8 +822,8 @@ define amdgpu_gfx void @ret_void(i32 %x) #0 {
822822
; GISEL-NEXT: s_wait_alu 0xfffe
823823
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
824824
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
825-
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
826825
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
826+
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
827827
; GISEL-NEXT: s_mov_b32 s32, s33
828828
; GISEL-NEXT: v_readlane_b32 s0, v40, 2
829829
; GISEL-NEXT: s_or_saveexec_b32 s1, -1

0 commit comments

Comments
 (0)