Skip to content

Commit bb5598a

Browse files
author
Salinas, David
authored
[AMDGPU] Change SGPR layout to striped caller/callee saved (llvm#127353) (llvm#1371)
2 parents 4a86a97 + ec4fa6b commit bb5598a

File tree

67 files changed

+21875
-21202
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+21875
-21202
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,15 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
9191
>;
9292

9393
def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
94-
(sequence "SGPR%u", 30, 105)
94+
// Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer),
95+
// and s34 (base pointer) are callee-saved. The striped layout starts from s40,
96+
// with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid
97+
// ending with a 2-wide stripe.
98+
(add (sequence "SGPR%u", 30, 39),
99+
(sequence "SGPR%u", 48, 55),
100+
(sequence "SGPR%u", 64, 71),
101+
(sequence "SGPR%u", 80, 87),
102+
(sequence "SGPR%u", 96, 105))
95103
>;
96104

97105
def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<

llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir

Lines changed: 776 additions & 8 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 115 additions & 115 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgpu-spill-cfi-saved-regs.ll

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -446,17 +446,16 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
446446
; WAVE64-NEXT: .cfi_undefined 60
447447
; WAVE64-NEXT: .cfi_undefined 61
448448
; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449-
; WAVE64-NEXT: s_mov_b32 s4, s33
449+
; WAVE64-NEXT: s_mov_b32 s40, s33
450+
; WAVE64-NEXT: .cfi_register 65, 72
450451
; WAVE64-NEXT: s_mov_b32 s33, s32
451-
; WAVE64-NEXT: s_xor_saveexec_b64 s[6:7], -1
452+
; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1
452453
; WAVE64-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
453454
; WAVE64-NEXT: .cfi_offset 2599, 12288
454-
; WAVE64-NEXT: s_mov_b64 exec, s[6:7]
455+
; WAVE64-NEXT: s_mov_b64 exec, s[4:5]
455456
; WAVE64-NEXT: v_writelane_b32 v39, exec_lo, 32
456457
; WAVE64-NEXT: v_writelane_b32 v39, exec_hi, 33
457458
; WAVE64-NEXT: .cfi_llvm_vector_registers 17, 2599, 32, 32, 2599, 33, 32
458-
; WAVE64-NEXT: v_writelane_b32 v39, s4, 34
459-
; WAVE64-NEXT: .cfi_llvm_vector_registers 65, 2599, 34, 32
460459
; WAVE64-NEXT: .cfi_def_cfa_register 65
461460
; WAVE64-NEXT: s_addk_i32 s32, 0x3200
462461
; WAVE64-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
@@ -706,12 +705,11 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
706705
; WAVE64-NEXT: v_readlane_b32 s35, v39, 1
707706
; WAVE64-NEXT: v_readlane_b32 s34, v39, 0
708707
; WAVE64-NEXT: s_mov_b32 s32, s33
709-
; WAVE64-NEXT: v_readlane_b32 s4, v39, 34
710-
; WAVE64-NEXT: s_xor_saveexec_b64 s[6:7], -1
708+
; WAVE64-NEXT: s_xor_saveexec_b64 s[4:5], -1
711709
; WAVE64-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
712-
; WAVE64-NEXT: s_mov_b64 exec, s[6:7]
710+
; WAVE64-NEXT: s_mov_b64 exec, s[4:5]
713711
; WAVE64-NEXT: .cfi_def_cfa_register 64
714-
; WAVE64-NEXT: s_mov_b32 s33, s4
712+
; WAVE64-NEXT: s_mov_b32 s33, s40
715713
; WAVE64-NEXT: s_waitcnt vmcnt(0)
716714
; WAVE64-NEXT: s_setpc_b64 s[30:31]
717715
;
@@ -830,21 +828,19 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
830828
; WAVE32-NEXT: .cfi_undefined 60
831829
; WAVE32-NEXT: .cfi_undefined 61
832830
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
833-
; WAVE32-NEXT: s_mov_b32 s4, s33
831+
; WAVE32-NEXT: s_mov_b32 s40, s33
832+
; WAVE32-NEXT: .cfi_register 65, 72
834833
; WAVE32-NEXT: s_mov_b32 s33, s32
835-
; WAVE32-NEXT: s_xor_saveexec_b32 s5, -1
834+
; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1
836835
; WAVE32-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill
837836
; WAVE32-NEXT: .cfi_offset 1575, 6144
838837
; WAVE32-NEXT: s_waitcnt_depctr 0xffe3
839-
; WAVE32-NEXT: s_mov_b32 exec_lo, s5
838+
; WAVE32-NEXT: s_mov_b32 exec_lo, s4
840839
; WAVE32-NEXT: v_mov_b32_e32 v0, exec_lo
841840
; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill
842841
; WAVE32-NEXT: .cfi_offset 1, 6272
843-
; WAVE32-NEXT: v_mov_b32_e32 v0, s4
844-
; WAVE32-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill
845-
; WAVE32-NEXT: .cfi_offset 65, 6400
846842
; WAVE32-NEXT: .cfi_def_cfa_register 65
847-
; WAVE32-NEXT: s_addk_i32 s32, 0x1a00
843+
; WAVE32-NEXT: s_addk_i32 s32, 0x1980
848844
; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill
849845
; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 6016
850846
; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill
@@ -1011,7 +1007,7 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
10111007
; WAVE32-NEXT: ;;#ASMSTART
10121008
; WAVE32-NEXT: ; clobber all VGPRs except v39
10131009
; WAVE32-NEXT: ;;#ASMEND
1014-
; WAVE32-NEXT: s_clause 0x30
1010+
; WAVE32-NEXT: s_clause 0x2f
10151011
; WAVE32-NEXT: buffer_load_dword v127, off, s[0:3], s33
10161012
; WAVE32-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:4
10171013
; WAVE32-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:8
@@ -1060,7 +1056,6 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
10601056
; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:180
10611057
; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:184
10621058
; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:188
1063-
; WAVE32-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:200
10641059
; WAVE32-NEXT: v_readlane_b32 s97, v39, 31
10651060
; WAVE32-NEXT: v_readlane_b32 s96, v39, 30
10661061
; WAVE32-NEXT: v_readlane_b32 s87, v39, 29
@@ -1094,14 +1089,12 @@ define void @callee_need_to_spill_fp_exec_to_memory() #2 {
10941089
; WAVE32-NEXT: v_readlane_b32 s35, v39, 1
10951090
; WAVE32-NEXT: v_readlane_b32 s34, v39, 0
10961091
; WAVE32-NEXT: s_mov_b32 s32, s33
1097-
; WAVE32-NEXT: s_waitcnt vmcnt(0)
1098-
; WAVE32-NEXT: v_readfirstlane_b32 s4, v0
1099-
; WAVE32-NEXT: s_xor_saveexec_b32 s5, -1
1092+
; WAVE32-NEXT: s_xor_saveexec_b32 s4, -1
11001093
; WAVE32-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload
11011094
; WAVE32-NEXT: s_waitcnt_depctr 0xffe3
1102-
; WAVE32-NEXT: s_mov_b32 exec_lo, s5
1095+
; WAVE32-NEXT: s_mov_b32 exec_lo, s4
11031096
; WAVE32-NEXT: .cfi_def_cfa_register 64
1104-
; WAVE32-NEXT: s_mov_b32 s33, s4
1097+
; WAVE32-NEXT: s_mov_b32 s33, s40
11051098
; WAVE32-NEXT: s_waitcnt vmcnt(0)
11061099
; WAVE32-NEXT: s_setpc_b64 s[30:31]
11071100
call void asm sideeffect "; clobber nonpreserved and 32 CSR SGPRs",

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,38 @@ body: |
459459
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27
460460
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28
461461
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29
462+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40
463+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41
464+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42
465+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43
466+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44
467+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45
468+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46
469+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47
470+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56
471+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57
472+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58
473+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59
474+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60
475+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61
476+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62
477+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63
478+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72
479+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73
480+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74
481+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75
482+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76
483+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77
484+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78
485+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79
486+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88
487+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89
488+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90
489+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91
490+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92
491+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93
492+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94
493+
; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95
462494
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
463495
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
464496
; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 4352

0 commit comments

Comments
 (0)