@@ -1501,6 +1501,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
15011501; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
15021502; GFX1250-NEXT: global_wb scope:SCOPE_SYS
15031503; GFX1250-NEXT: s_wait_storecnt 0x0
1504+ ; GFX1250-NEXT: s_wait_xcnt 0x0
15041505; GFX1250-NEXT: s_wait_kmcnt 0x0
15051506; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
15061507; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1571,6 +1572,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
15711572; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
15721573; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
15731574; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
1575+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
1576+ ; GFX1250-NEXT: s_wait_storecnt 0x0
1577+ ; GFX1250-NEXT: s_wait_xcnt 0x0
15741578; GFX1250-NEXT: s_wait_kmcnt 0x0
15751579; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
15761580; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1645,6 +1649,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
16451649; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
16461650; GFX1250-NEXT: global_wb scope:SCOPE_SYS
16471651; GFX1250-NEXT: s_wait_storecnt 0x0
1652+ ; GFX1250-NEXT: s_wait_xcnt 0x0
16481653; GFX1250-NEXT: s_wait_kmcnt 0x0
16491654; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
16501655; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1715,6 +1720,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
17151720; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
17161721; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
17171722; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
1723+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
1724+ ; GFX1250-NEXT: s_wait_storecnt 0x0
1725+ ; GFX1250-NEXT: s_wait_xcnt 0x0
17181726; GFX1250-NEXT: s_wait_kmcnt 0x0
17191727; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
17201728; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1792,6 +1800,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
17921800; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
17931801; GFX1250-NEXT: s_wait_kmcnt 0x0
17941802; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
1803+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
17951804; GFX1250-NEXT: s_wait_storecnt 0x0
17961805; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
17971806; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -1902,6 +1911,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
19021911; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
19031912; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
19041913; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
1914+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
1915+ ; GFX1250-NEXT: s_wait_storecnt 0x0
1916+ ; GFX1250-NEXT: s_wait_xcnt 0x0
19051917; GFX1250-NEXT: s_wait_kmcnt 0x0
19061918; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
19071919; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1947,6 +1959,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
19471959; GFX1250-NEXT: v_mov_b32_e32 v2, 0
19481960; GFX1250-NEXT: global_wb scope:SCOPE_SYS
19491961; GFX1250-NEXT: s_wait_storecnt 0x0
1962+ ; GFX1250-NEXT: s_wait_xcnt 0x0
19501963; GFX1250-NEXT: s_wait_kmcnt 0x0
19511964; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
19521965; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1987,6 +2000,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
19872000; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
19882001; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
19892002; GFX1250-NEXT: v_mov_b32_e32 v2, 0
2003+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
2004+ ; GFX1250-NEXT: s_wait_storecnt 0x0
2005+ ; GFX1250-NEXT: s_wait_xcnt 0x0
19902006; GFX1250-NEXT: s_wait_kmcnt 0x0
19912007; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
19922008; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2031,6 +2047,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
20312047; GFX1250-NEXT: v_mov_b32_e32 v2, 0
20322048; GFX1250-NEXT: global_wb scope:SCOPE_SYS
20332049; GFX1250-NEXT: s_wait_storecnt 0x0
2050+ ; GFX1250-NEXT: s_wait_xcnt 0x0
20342051; GFX1250-NEXT: s_wait_kmcnt 0x0
20352052; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
20362053; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2107,6 +2124,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
21072124; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
21082125; GFX1250-NEXT: s_wait_kmcnt 0x0
21092126; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
2127+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
21102128; GFX1250-NEXT: s_wait_storecnt 0x0
21112129; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21122130; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2190,6 +2208,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
21902208; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
21912209; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
21922210; GFX1250-NEXT: v_mov_b32_e32 v2, 0
2211+ ; GFX1250-NEXT: global_wb scope:SCOPE_DEV
2212+ ; GFX1250-NEXT: s_wait_storecnt 0x0
2213+ ; GFX1250-NEXT: s_wait_xcnt 0x0
21932214; GFX1250-NEXT: s_wait_kmcnt 0x0
21942215; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
21952216; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2418,6 +2439,7 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
24182439; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
24192440; GFX1250-NEXT: s_wait_kmcnt 0x0
24202441; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
2442+ ; GFX1250-NEXT: s_wait_storecnt 0x0
24212443; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
24222444; GFX1250-NEXT: s_wait_dscnt 0x0
24232445; GFX1250-NEXT: s_set_pc_i64 s[30:31]
0 commit comments