@@ -22,6 +22,7 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
2222; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
2323; GFX12-NEXT: s_wait_dscnt 0x0
2424; GFX12-NEXT: global_inv scope:SCOPE_SE
25+ ; GFX12-NEXT: s_wait_loadcnt 0x0
2526; GFX12-NEXT: s_setpc_b64 s[30:31]
2627;
2728; GFX942-LABEL: local_atomic_fmax_ret_f32:
@@ -94,6 +95,7 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
9495; GFX12-NEXT: ds_max_num_f32 v0, v1
9596; GFX12-NEXT: s_wait_dscnt 0x0
9697; GFX12-NEXT: global_inv scope:SCOPE_SE
98+ ; GFX12-NEXT: s_wait_loadcnt 0x0
9799; GFX12-NEXT: s_setpc_b64 s[30:31]
98100;
99101; GFX942-LABEL: local_atomic_fmax_noret_f32:
@@ -166,6 +168,7 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
166168; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
167169; GFX12-NEXT: s_wait_dscnt 0x0
168170; GFX12-NEXT: global_inv scope:SCOPE_SE
171+ ; GFX12-NEXT: s_wait_loadcnt 0x0
169172; GFX12-NEXT: s_setpc_b64 s[30:31]
170173;
171174; GFX942-LABEL: local_atomic_fmax_ret_f64:
@@ -242,6 +245,7 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
242245; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
243246; GFX12-NEXT: s_wait_dscnt 0x0
244247; GFX12-NEXT: global_inv scope:SCOPE_SE
248+ ; GFX12-NEXT: s_wait_loadcnt 0x0
245249; GFX12-NEXT: s_setpc_b64 s[30:31]
246250;
247251; GFX942-LABEL: local_atomic_fmax_noret_f64:
@@ -318,6 +322,7 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
318322; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
319323; GFX12-NEXT: s_wait_loadcnt 0x0
320324; GFX12-NEXT: global_inv scope:SCOPE_DEV
325+ ; GFX12-NEXT: s_wait_loadcnt 0x0
321326; GFX12-NEXT: s_setpc_b64 s[30:31]
322327;
323328; GFX942-LABEL: global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -464,6 +469,7 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
464469; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
465470; GFX12-NEXT: s_wait_storecnt 0x0
466471; GFX12-NEXT: global_inv scope:SCOPE_DEV
472+ ; GFX12-NEXT: s_wait_loadcnt 0x0
467473; GFX12-NEXT: s_setpc_b64 s[30:31]
468474;
469475; GFX942-LABEL: global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -624,6 +630,7 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
624630; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
625631; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
626632; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
633+ ; GFX12-NEXT: s_wait_loadcnt 0x0
627634; GFX12-NEXT: s_setpc_b64 s[30:31]
628635;
629636; GFX942-LABEL: global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -779,6 +786,7 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
779786; GFX12-NEXT: s_cbranch_execnz .LBB7_1
780787; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
781788; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
789+ ; GFX12-NEXT: s_wait_loadcnt 0x0
782790; GFX12-NEXT: s_setpc_b64 s[30:31]
783791;
784792; GFX942-LABEL: global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@@ -909,6 +917,7 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
909917; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
910918; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
911919; GFX12-NEXT: global_inv scope:SCOPE_DEV
920+ ; GFX12-NEXT: s_wait_loadcnt 0x0
912921; GFX12-NEXT: s_setpc_b64 s[30:31]
913922;
914923; GFX942-LABEL: flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1051,6 +1060,7 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
10511060; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
10521061; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
10531062; GFX12-NEXT: global_inv scope:SCOPE_DEV
1063+ ; GFX12-NEXT: s_wait_loadcnt 0x0
10541064; GFX12-NEXT: s_setpc_b64 s[30:31]
10551065;
10561066; GFX942-LABEL: flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1210,6 +1220,7 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
12101220; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
12111221; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
12121222; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1223+ ; GFX12-NEXT: s_wait_loadcnt 0x0
12131224; GFX12-NEXT: s_setpc_b64 s[30:31]
12141225;
12151226; GFX942-LABEL: flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1363,6 +1374,7 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
13631374; GFX12-NEXT: s_cbranch_execnz .LBB11_1
13641375; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
13651376; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
1377+ ; GFX12-NEXT: s_wait_loadcnt 0x0
13661378; GFX12-NEXT: s_setpc_b64 s[30:31]
13671379;
13681380; GFX942-LABEL: flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
@@ -1495,6 +1507,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
14951507; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
14961508; GFX12-NEXT: s_wait_loadcnt 0x0
14971509; GFX12-NEXT: global_inv scope:SCOPE_DEV
1510+ ; GFX12-NEXT: s_wait_loadcnt 0x0
14981511; GFX12-NEXT: s_setpc_b64 s[30:31]
14991512;
15001513; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory:
@@ -1651,6 +1664,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
16511664; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
16521665; GFX12-NEXT: s_wait_storecnt 0x0
16531666; GFX12-NEXT: global_inv scope:SCOPE_DEV
1667+ ; GFX12-NEXT: s_wait_loadcnt 0x0
16541668; GFX12-NEXT: s_setpc_b64 s[30:31]
16551669;
16561670; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory:
@@ -1824,6 +1838,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18241838; GFX12-NEXT: s_cbranch_execnz .LBB14_1
18251839; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
18261840; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
1841+ ; GFX12-NEXT: s_wait_loadcnt 0x0
18271842; GFX12-NEXT: s_setpc_b64 s[30:31]
18281843;
18291844; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory:
@@ -1994,6 +2009,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
19942009; GFX12-NEXT: s_cbranch_execnz .LBB15_1
19952010; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end
19962011; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4
2012+ ; GFX12-NEXT: s_wait_loadcnt 0x0
19972013; GFX12-NEXT: s_setpc_b64 s[30:31]
19982014;
19992015; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory:
0 commit comments