@@ -67,16 +67,24 @@ body: |
6767 liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
6868
6969 ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args
70- ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
70+ ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
7171 ; GCN-NEXT: {{ $}}
72- ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
72+ ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
73+ ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
74+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
75+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
76+ ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
7377 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
74- ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0 , 0
75- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 10, implicit $exec
76- ; GCN-NEXT: $vgpr8 = COPY killed $vgpr0
78+ ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10 , 0
79+ ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 10, implicit $exec
80+ ; GCN-NEXT: $vgpr8 = COPY killed $vgpr10
7781 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
7882 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
83+ ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
84+ ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
85+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
7986 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
87+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
8088 $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
8189 $sgpr35 = S_MOV_B32 5
8290 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
@@ -104,10 +112,12 @@ body: |
104112 ; GCN-LABEL: name: dont_preserve_args
105113 ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9
106114 ; GCN-NEXT: {{ $}}
115+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
107116 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
108117 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
109118 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
110119 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
120+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
111121 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
112122 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
113123 renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
@@ -131,15 +141,23 @@ body: |
131141 liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
132142
133143 ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args
134- ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr10
144+ ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $ vgpr10
135145 ; GCN-NEXT: {{ $}}
146+ ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
147+ ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
148+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
149+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
136150 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
137151 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
138152 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
139153 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
140154 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
141155 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
142- ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr0
156+ ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
157+ ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
158+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
159+ ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
160+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
143161 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
144162 $sgpr35 = S_MOV_B32 5
145163 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
@@ -168,13 +186,15 @@ body: |
168186 ; GCN-LABEL: name: dont_preserve_if_no_chain_calls
169187 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
170188 ; GCN-NEXT: {{ $}}
189+ ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
171190 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
172191 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5
173192 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
174193 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
175194 ; GCN-NEXT: $vgpr9 = V_MOV_B32_e32 20, implicit $exec
176195 ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 30, implicit $exec
177196 ; GCN-NEXT: S_ENDPGM 0
197+ S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
178198 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
179199 $sgpr35 = S_MOV_B32 5
180200 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
0 commit comments