Skip to content

Commit b46525b

Browse files
committed
Use register pair for PC spill
1 parent 32bd3c3 commit b46525b

File tree

56 files changed

+3262
-3229
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+3262
-3229
lines changed

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,19 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
267267

268268
std::vector<CalleeSavedInfo> CSI;
269269
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
270+
Register RetAddrReg = TRI->getReturnAddressReg(MF);
271+
bool SpillRetAddrReg = false;
270272

271273
for (unsigned I = 0; CSRegs[I]; ++I) {
272274
MCRegister Reg = CSRegs[I];
273275

274276
if (SavedRegs.test(Reg)) {
277+
if (Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub0) ||
278+
Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub1)) {
279+
SpillRetAddrReg = true;
280+
continue;
281+
}
282+
275283
const TargetRegisterClass *RC =
276284
TRI->getMinimalPhysRegClass(Reg, MVT::i32);
277285
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
@@ -282,6 +290,18 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
282290
}
283291
}
284292

293+
// Return address uses a register pair. Add the super register to the
294+
// CSI list so that it's easier to identify the entire spill and CFI
295+
// can be emitted appropriately.
296+
if (SpillRetAddrReg) {
297+
const TargetRegisterClass *RC =
298+
TRI->getMinimalPhysRegClass(RetAddrReg, MVT::i64);
299+
int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
300+
TRI->getSpillAlign(*RC), true);
301+
CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI));
302+
CalleeSavedFIs.push_back(JunkFI);
303+
}
304+
285305
if (!CSI.empty()) {
286306
for (MachineBasicBlock *SaveBlock : SaveBlocks)
287307
insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);

llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ define ptr addrspace(1) @call_assert_align() {
2323
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
2424
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
2525
; CHECK-NEXT: v_mov_b32_e32 v2, 0
26+
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
2627
; CHECK-NEXT: global_store_dword v[0:1], v2, off
2728
; CHECK-NEXT: s_waitcnt vmcnt(0)
2829
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
29-
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
3030
; CHECK-NEXT: s_mov_b32 s32, s33
3131
; CHECK-NEXT: v_readlane_b32 s4, v40, 2
3232
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1

llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,8 @@ define void @func_caller_stack() #2 {
238238
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
239239
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
240240
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
241-
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
242241
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
242+
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
243243
; MUBUF-NEXT: s_mov_b32 s32, s33
244244
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
245245
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
@@ -277,8 +277,8 @@ define void @func_caller_stack() #2 {
277277
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
278278
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
279279
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
280-
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
281280
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
281+
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
282282
; FLATSCR-NEXT: s_mov_b32 s32, s33
283283
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
284284
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
@@ -363,8 +363,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
363363
; MUBUF-NEXT: s_waitcnt vmcnt(1)
364364
; MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60
365365
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
366-
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
367366
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
367+
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
368368
; MUBUF-NEXT: s_mov_b32 s32, s33
369369
; MUBUF-NEXT: v_readlane_b32 s4, v40, 2
370370
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
@@ -414,8 +414,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
414414
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
415415
; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:56
416416
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
417-
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
418417
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
418+
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
419419
; FLATSCR-NEXT: s_mov_b32 s32, s33
420420
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2
421421
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1

llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,8 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) #0 {
244244
; GFX9-NEXT: s_addk_i32 s32, 0x400
245245
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
246246
; GFX9-NEXT: s_swappc_b64 s[30:31], 0
247-
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
248247
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
248+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
249249
; GFX9-NEXT: s_mov_b32 s32, s33
250250
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
251251
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1

llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ define void @parent_func_missing_inputs() #0 {
3030
; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
3131
; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
3232
; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
33-
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
3433
; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0
34+
; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1
3535
; FIXEDABI-NEXT: s_mov_b32 s32, s33
3636
; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2
3737
; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1

llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
3535
; DAGISEL-NEXT: s_clause 0x1
3636
; DAGISEL-NEXT: scratch_load_b32 v41, off, s33
3737
; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4
38-
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1
3938
; DAGISEL-NEXT: v_readlane_b32 s30, v42, 0
39+
; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1
4040
; DAGISEL-NEXT: s_mov_b32 s32, s33
4141
; DAGISEL-NEXT: v_readlane_b32 s0, v42, 2
4242
; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1
@@ -78,8 +78,8 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
7878
; GISEL-NEXT: s_clause 0x1
7979
; GISEL-NEXT: scratch_load_b32 v41, off, s33
8080
; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4
81-
; GISEL-NEXT: v_readlane_b32 s31, v42, 1
8281
; GISEL-NEXT: v_readlane_b32 s30, v42, 0
82+
; GISEL-NEXT: v_readlane_b32 s31, v42, 1
8383
; GISEL-NEXT: s_mov_b32 s32, s33
8484
; GISEL-NEXT: v_readlane_b32 s0, v42, 2
8585
; GISEL-NEXT: s_or_saveexec_b32 s1, -1
@@ -787,8 +787,8 @@ define amdgpu_gfx void @ret_void(i32 %x) #0 {
787787
; DAGISEL-NEXT: s_wait_alu 0xfffe
788788
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
789789
; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
790-
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1
791790
; DAGISEL-NEXT: v_readlane_b32 s30, v40, 0
791+
; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1
792792
; DAGISEL-NEXT: s_mov_b32 s32, s33
793793
; DAGISEL-NEXT: v_readlane_b32 s0, v40, 2
794794
; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1
@@ -822,8 +822,8 @@ define amdgpu_gfx void @ret_void(i32 %x) #0 {
822822
; GISEL-NEXT: s_wait_alu 0xfffe
823823
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
824824
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
825-
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
826825
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
826+
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
827827
; GISEL-NEXT: s_mov_b32 s32, s33
828828
; GISEL-NEXT: v_readlane_b32 s0, v40, 2
829829
; GISEL-NEXT: s_or_saveexec_b32 s1, -1

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16525,25 +16525,25 @@ define inreg <40 x i8> @bitcast_v20i16_to_v40i8_scalar(<20 x i16> inreg %a, i32
1652516525
; SI-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill
1652616526
; SI-NEXT: s_mov_b64 exec, s[4:5]
1652716527
; SI-NEXT: s_waitcnt expcnt(0)
16528-
; SI-NEXT: v_writelane_b32 v8, s30, 0
16529-
; SI-NEXT: v_writelane_b32 v8, s31, 1
16530-
; SI-NEXT: v_writelane_b32 v8, s34, 2
16531-
; SI-NEXT: v_writelane_b32 v8, s35, 3
16532-
; SI-NEXT: v_writelane_b32 v8, s36, 4
16533-
; SI-NEXT: v_writelane_b32 v8, s37, 5
16534-
; SI-NEXT: v_writelane_b32 v8, s38, 6
16535-
; SI-NEXT: v_writelane_b32 v8, s39, 7
16536-
; SI-NEXT: v_writelane_b32 v8, s48, 8
16537-
; SI-NEXT: v_writelane_b32 v8, s49, 9
16528+
; SI-NEXT: v_writelane_b32 v8, s34, 0
16529+
; SI-NEXT: v_writelane_b32 v8, s35, 1
16530+
; SI-NEXT: v_writelane_b32 v8, s36, 2
16531+
; SI-NEXT: v_writelane_b32 v8, s37, 3
16532+
; SI-NEXT: v_writelane_b32 v8, s38, 4
16533+
; SI-NEXT: v_writelane_b32 v8, s39, 5
16534+
; SI-NEXT: v_writelane_b32 v8, s48, 6
16535+
; SI-NEXT: v_writelane_b32 v8, s49, 7
16536+
; SI-NEXT: v_writelane_b32 v8, s50, 8
1653816537
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
16539-
; SI-NEXT: v_writelane_b32 v8, s50, 10
16538+
; SI-NEXT: v_writelane_b32 v8, s30, 9
1654016539
; SI-NEXT: v_readfirstlane_b32 s39, v6
1654116540
; SI-NEXT: v_readfirstlane_b32 s48, v5
1654216541
; SI-NEXT: v_readfirstlane_b32 s49, v4
1654316542
; SI-NEXT: v_readfirstlane_b32 s50, v3
1654416543
; SI-NEXT: v_readfirstlane_b32 s35, v2
1654516544
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
1654616545
; SI-NEXT: v_readfirstlane_b32 s38, v1
16546+
; SI-NEXT: v_writelane_b32 v8, s31, 10
1654716547
; SI-NEXT: s_cbranch_scc0 .LBB49_4
1654816548
; SI-NEXT: ; %bb.1: ; %cmp.false
1654916549
; SI-NEXT: s_and_b32 s4, s16, 0xffff
@@ -16815,18 +16815,18 @@ define inreg <40 x i8> @bitcast_v20i16_to_v40i8_scalar(<20 x i16> inreg %a, i32
1681516815
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
1681616816
; SI-NEXT: v_add_i32_e32 v0, vcc, 36, v0
1681716817
; SI-NEXT: v_mov_b32_e32 v1, s4
16818+
; SI-NEXT: v_readlane_b32 s30, v8, 9
1681816819
; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
16819-
; SI-NEXT: v_readlane_b32 s50, v8, 10
16820-
; SI-NEXT: v_readlane_b32 s49, v8, 9
16821-
; SI-NEXT: v_readlane_b32 s48, v8, 8
16822-
; SI-NEXT: v_readlane_b32 s39, v8, 7
16823-
; SI-NEXT: v_readlane_b32 s38, v8, 6
16824-
; SI-NEXT: v_readlane_b32 s37, v8, 5
16825-
; SI-NEXT: v_readlane_b32 s36, v8, 4
16826-
; SI-NEXT: v_readlane_b32 s35, v8, 3
16827-
; SI-NEXT: v_readlane_b32 s34, v8, 2
16828-
; SI-NEXT: v_readlane_b32 s31, v8, 1
16829-
; SI-NEXT: v_readlane_b32 s30, v8, 0
16820+
; SI-NEXT: v_readlane_b32 s31, v8, 10
16821+
; SI-NEXT: v_readlane_b32 s50, v8, 8
16822+
; SI-NEXT: v_readlane_b32 s49, v8, 7
16823+
; SI-NEXT: v_readlane_b32 s48, v8, 6
16824+
; SI-NEXT: v_readlane_b32 s39, v8, 5
16825+
; SI-NEXT: v_readlane_b32 s38, v8, 4
16826+
; SI-NEXT: v_readlane_b32 s37, v8, 3
16827+
; SI-NEXT: v_readlane_b32 s36, v8, 2
16828+
; SI-NEXT: v_readlane_b32 s35, v8, 1
16829+
; SI-NEXT: v_readlane_b32 s34, v8, 0
1683016830
; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1
1683116831
; SI-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
1683216832
; SI-NEXT: s_mov_b64 exec, s[4:5]

0 commit comments

Comments
 (0)