@@ -154,26 +154,31 @@ define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 {
154154; CHECK-LABEL: realign_stack:
155155; CHECK: ; %bb.0:
156156; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
157- ; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi
157+ ; CHECK-NEXT: v_mov_b32_e32 v32, 0
158158; CHECK-NEXT: s_cmp_lg_u32 0, s33
159- ; CHECK-NEXT: s_mov_b32 s0 , callee@abs32@lo
159+ ; CHECK-NEXT: s_mov_b32 s1 , callee@abs32@hi
160160; CHECK-NEXT: s_cmovk_i32 s33, 0x200
161- ; CHECK-NEXT: s_movk_i32 s32, 0x100
161+ ; CHECK-NEXT: s_mov_b32 s0, callee@abs32@lo
162+ ; CHECK-NEXT: scratch_store_b32 off, v32, s33 scope:SCOPE_SYS
163+ ; CHECK-NEXT: s_wait_storecnt 0x0
162164; CHECK-NEXT: s_clause 0x7
163- ; CHECK-NEXT: scratch_store_b128 off, v[28:31], s33 offset:112
164165; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96
165- ; CHECK-NEXT: scratch_store_b128 off, v[20:23 ], s33 offset:80
166+ ; CHECK-NEXT: scratch_store_b128 off, v[28:31 ], s33 offset:112
166167; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64
167- ; CHECK-NEXT: scratch_store_b128 off, v[12:15 ], s33 offset:48
168+ ; CHECK-NEXT: scratch_store_b128 off, v[20:23 ], s33 offset:80
168169; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32
170+ ; CHECK-NEXT: scratch_store_b128 off, v[12:15], s33 offset:48
169171; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16
170172; CHECK-NEXT: scratch_store_b128 off, v[0:3], s33
171173; CHECK-NEXT: v_mov_b32_e32 v0, 0x47
174+ ; CHECK-NEXT: s_movk_i32 s32, 0x100
172175; CHECK-NEXT: s_cmovk_i32 s32, 0x300
173176; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
174177; CHECK-NEXT: s_alloc_vgpr 0
175178; CHECK-NEXT: s_endpgm
176179 %v = alloca <32 x i32 >, align 128 , addrspace (5 )
180+ ; use volatile store to avoid promotion of alloca to registers
181+ store volatile i32 0 , ptr addrspace (5 ) %v
177182 store <32 x i32 > %x , ptr addrspace (5 ) %v
178183 call amdgpu_gfx void @callee (i32 71 )
179184 ret void
0 commit comments