@@ -155,8 +155,6 @@ main_body:
155155 ret void
156156}
157157
158- ; There are 8 pseudo registers defined to track LDS DMA dependencies.
159-
160158define amdgpu_kernel void @buffer_load_lds_dword_10_arrays (<4 x i32 > %rsrc , i32 %i1 , i32 %i2 , i32 %i3 , i32 %i4 , i32 %i5 , i32 %i6 , i32 %i7 , i32 %i8 , i32 %i9 , ptr addrspace (1 ) %out ) {
161159; GFX9-LABEL: buffer_load_lds_dword_10_arrays:
162160; GFX9: ; %bb.0: ; %main_body
@@ -222,8 +220,10 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
222220; GFX9-NEXT: s_waitcnt vmcnt(2)
223221; GFX9-NEXT: ds_read_b32 v7, v9 offset:1792
224222; GFX9-NEXT: ; wave barrier
223+ ; GFX9-NEXT: s_waitcnt vmcnt(1)
225224; GFX9-NEXT: ds_read_b32 v8, v9 offset:2048
226225; GFX9-NEXT: ; wave barrier
226+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
227227; GFX9-NEXT: ds_read_b32 v9, v9 offset:2304
228228; GFX9-NEXT: s_waitcnt lgkmcnt(0)
229229; GFX9-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -287,8 +287,10 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
287287; GFX10-NEXT: s_waitcnt vmcnt(2)
288288; GFX10-NEXT: ds_read_b32 v7, v9 offset:1792
289289; GFX10-NEXT: ; wave barrier
290+ ; GFX10-NEXT: s_waitcnt vmcnt(1)
290291; GFX10-NEXT: ds_read_b32 v8, v9 offset:2048
291292; GFX10-NEXT: ; wave barrier
293+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
292294; GFX10-NEXT: ds_read_b32 v9, v9 offset:2304
293295; GFX10-NEXT: s_waitcnt lgkmcnt(0)
294296; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
0 commit comments