@@ -92,59 +92,16 @@ define amdgpu_cs void @with_calls_no_inline_const() #0 {
9292 ret void
9393}
9494
95- ; We're going to limit this to 16 VGPRs, so we need to spill the rest.
96- define amdgpu_cs void @with_spills (ptr addrspace (1 ) %p1 , ptr addrspace (1 ) %p2 ) #1 {
95+ define amdgpu_cs void @with_spills () {
9796; CHECK-LABEL: with_spills:
9897; CHECK: ; %bb.0:
9998; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
100- ; CHECK-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96
99+ ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
101100; CHECK-NEXT: s_cmp_lg_u32 0, s33
102101; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0
103- ; CHECK-NEXT: s_wait_loadcnt 0x0
104- ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:80 ; 16-byte Folded Spill
105- ; CHECK-NEXT: s_clause 0x2
106- ; CHECK-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112
107- ; CHECK-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64
108- ; CHECK-NEXT: global_load_b128 v[4:7], v[0:1], off offset:80
109- ; CHECK-NEXT: s_wait_loadcnt 0x0
110- ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:64 ; 16-byte Folded Spill
111- ; CHECK-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32
112- ; CHECK-NEXT: s_wait_loadcnt 0x0
113- ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:48 ; 16-byte Folded Spill
114- ; CHECK-NEXT: global_load_b128 v[4:7], v[0:1], off offset:48
115- ; CHECK-NEXT: s_wait_loadcnt 0x0
116- ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:32 ; 16-byte Folded Spill
117- ; CHECK-NEXT: global_load_b128 v[4:7], v[0:1], off
118- ; CHECK-NEXT: s_wait_loadcnt 0x0
119- ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16 ; 16-byte Folded Spill
120- ; CHECK-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
121- ; CHECK-NEXT: s_wait_loadcnt 0x0
122- ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 ; 16-byte Folded Spill
123- ; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 offset:80 th:TH_LOAD_LU ; 16-byte Folded Reload
124- ; CHECK-NEXT: s_wait_loadcnt 0x0
125- ; CHECK-NEXT: s_clause 0x2
126- ; CHECK-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96
127- ; CHECK-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112
128- ; CHECK-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64
129- ; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 offset:64 th:TH_LOAD_LU ; 16-byte Folded Reload
130- ; CHECK-NEXT: s_wait_loadcnt 0x0
131- ; CHECK-NEXT: global_store_b128 v[2:3], v[4:7], off offset:80
132- ; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 offset:48 th:TH_LOAD_LU ; 16-byte Folded Reload
133- ; CHECK-NEXT: s_wait_loadcnt 0x0
134- ; CHECK-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32
135- ; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 offset:32 th:TH_LOAD_LU ; 16-byte Folded Reload
136- ; CHECK-NEXT: s_wait_loadcnt 0x0
137- ; CHECK-NEXT: global_store_b128 v[2:3], v[4:7], off offset:48
138- ; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 offset:16 th:TH_LOAD_LU ; 16-byte Folded Reload
139- ; CHECK-NEXT: s_wait_loadcnt 0x0
140- ; CHECK-NEXT: global_store_b128 v[2:3], v[4:7], off
141- ; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 th:TH_LOAD_LU ; 16-byte Folded Reload
142- ; CHECK-NEXT: s_wait_loadcnt 0x0
143- ; CHECK-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16
144102; CHECK-NEXT: s_alloc_vgpr 0
145103; CHECK-NEXT: s_endpgm
146- %v = load <32 x i32 >, ptr addrspace (1 ) %p1
147- store <32 x i32 > %v , ptr addrspace (1 ) %p2
104+ call void asm "; spills" , "~{v40},~{v42}" ()
148105 ret void
149106}
150107
@@ -258,5 +215,3 @@ define void @default() #0 {
258215declare amdgpu_gfx void @callee (i32 ) #0
259216
260217attributes #0 = { nounwind }
261- attributes #1 = { nounwind "amdgpu-num-vgpr" ="16" }
262-
0 commit comments