@@ -18,6 +18,7 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
1818; GFX12-NEXT: s_wait_samplecnt 0x0
1919; GFX12-NEXT: s_wait_bvhcnt 0x0
2020; GFX12-NEXT: s_wait_kmcnt 0x0
21+ ; GFX12-NEXT: global_wb scope:SCOPE_SE
2122; GFX12-NEXT: s_wait_storecnt 0x0
2223; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
2324; GFX12-NEXT: s_wait_dscnt 0x0
@@ -90,6 +91,7 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
9091; GFX12-NEXT: s_wait_samplecnt 0x0
9192; GFX12-NEXT: s_wait_bvhcnt 0x0
9293; GFX12-NEXT: s_wait_kmcnt 0x0
94+ ; GFX12-NEXT: global_wb scope:SCOPE_SE
9395; GFX12-NEXT: s_wait_storecnt 0x0
9496; GFX12-NEXT: ds_max_num_f32 v0, v1
9597; GFX12-NEXT: s_wait_dscnt 0x0
@@ -162,6 +164,7 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
162164; GFX12-NEXT: s_wait_samplecnt 0x0
163165; GFX12-NEXT: s_wait_bvhcnt 0x0
164166; GFX12-NEXT: s_wait_kmcnt 0x0
167+ ; GFX12-NEXT: global_wb scope:SCOPE_SE
165168; GFX12-NEXT: s_wait_storecnt 0x0
166169; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
167170; GFX12-NEXT: s_wait_dscnt 0x0
@@ -238,6 +241,7 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
238241; GFX12-NEXT: s_wait_samplecnt 0x0
239242; GFX12-NEXT: s_wait_bvhcnt 0x0
240243; GFX12-NEXT: s_wait_kmcnt 0x0
244+ ; GFX12-NEXT: global_wb scope:SCOPE_SE
241245; GFX12-NEXT: s_wait_storecnt 0x0
242246; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
243247; GFX12-NEXT: s_wait_dscnt 0x0
@@ -324,8 +328,9 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
324328; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
325329; GFX12-NEXT: v_max_num_f32_e32 v3, v4, v4
326330; GFX12-NEXT: v_max_num_f32_e32 v3, v3, v2
331+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
327332; GFX12-NEXT: s_wait_storecnt 0x0
328- ; GFX12-NEXT: global_atomic_cmpswap_b32 v3, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN
333+ ; GFX12-NEXT: global_atomic_cmpswap_b32 v3, v[0:1], v[3:4], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
329334; GFX12-NEXT: s_wait_loadcnt 0x0
330335; GFX12-NEXT: global_inv scope:SCOPE_DEV
331336; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4
@@ -538,8 +543,9 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
538543; GFX12-NEXT: v_max_num_f32_e32 v2, v3, v3
539544; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
540545; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v4
546+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
541547; GFX12-NEXT: s_wait_storecnt 0x0
542- ; GFX12-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], v[2:3], off th:TH_ATOMIC_RETURN
548+ ; GFX12-NEXT: global_atomic_cmpswap_b32 v2, v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
543549; GFX12-NEXT: s_wait_loadcnt 0x0
544550; GFX12-NEXT: global_inv scope:SCOPE_DEV
545551; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
@@ -746,8 +752,9 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
746752; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
747753; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
748754; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[2:3]
755+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
749756; GFX12-NEXT: s_wait_storecnt 0x0
750- ; GFX12-NEXT: global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off th:TH_ATOMIC_RETURN
757+ ; GFX12-NEXT: global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
751758; GFX12-NEXT: s_wait_loadcnt 0x0
752759; GFX12-NEXT: global_inv scope:SCOPE_DEV
753760; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7]
@@ -972,8 +979,9 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
972979; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
973980; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
974981; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
982+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
975983; GFX12-NEXT: s_wait_storecnt 0x0
976- ; GFX12-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN
984+ ; GFX12-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
977985; GFX12-NEXT: s_wait_loadcnt 0x0
978986; GFX12-NEXT: global_inv scope:SCOPE_DEV
979987; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
@@ -1186,8 +1194,9 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
11861194; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
11871195; GFX12-NEXT: v_max_num_f32_e32 v3, v4, v4
11881196; GFX12-NEXT: v_max_num_f32_e32 v3, v3, v2
1197+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
11891198; GFX12-NEXT: s_wait_storecnt 0x0
1190- ; GFX12-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] th:TH_ATOMIC_RETURN
1199+ ; GFX12-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
11911200; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
11921201; GFX12-NEXT: global_inv scope:SCOPE_DEV
11931202; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4
@@ -1395,8 +1404,9 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
13951404; GFX12-NEXT: v_max_num_f32_e32 v2, v3, v3
13961405; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
13971406; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v4
1407+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
13981408; GFX12-NEXT: s_wait_storecnt 0x0
1399- ; GFX12-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
1409+ ; GFX12-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14001410; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
14011411; GFX12-NEXT: global_inv scope:SCOPE_DEV
14021412; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
@@ -1598,8 +1608,9 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
15981608; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
15991609; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
16001610; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[2:3]
1611+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
16011612; GFX12-NEXT: s_wait_storecnt 0x0
1602- ; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] th:TH_ATOMIC_RETURN
1613+ ; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
16031614; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
16041615; GFX12-NEXT: global_inv scope:SCOPE_DEV
16051616; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7]
@@ -1823,8 +1834,9 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
18231834; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
18241835; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
18251836; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
1837+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
18261838; GFX12-NEXT: s_wait_storecnt 0x0
1827- ; GFX12-NEXT: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] th:TH_ATOMIC_RETURN
1839+ ; GFX12-NEXT: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18281840; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
18291841; GFX12-NEXT: global_inv scope:SCOPE_DEV
18301842; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
@@ -2035,11 +2047,11 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
20352047; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
20362048; GFX12-NEXT: s_wait_loadcnt 0x0
20372049; GFX12-NEXT: v_mov_b32_e32 v5, v0
2050+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
20382051; GFX12-NEXT: s_wait_storecnt 0x0
2039- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
20402052; GFX12-NEXT: v_max_num_f32_e32 v0, v5, v5
2053+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
20412054; GFX12-NEXT: v_max_num_f32_e32 v4, v0, v3
2042- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
20432055; GFX12-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
20442056; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[0:1], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
20452057; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -2285,9 +2297,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
22852297; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
22862298; GFX12-NEXT: s_wait_loadcnt 0x0
22872299; GFX12-NEXT: v_max_num_f32_e32 v0, v1, v1
2300+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
22882301; GFX12-NEXT: s_wait_storecnt 0x0
2289- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
22902302; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v3
2303+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
22912304; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0
22922305; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], null offen th:TH_ATOMIC_RETURN
22932306; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -2527,11 +2540,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
25272540; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
25282541; GFX12-NEXT: s_wait_loadcnt 0x0
25292542; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
2543+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
25302544; GFX12-NEXT: s_wait_storecnt 0x0
2531- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
25322545; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
2546+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
25332547; GFX12-NEXT: v_max_num_f64_e32 v[7:8], v[0:1], v[4:5]
2534- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
25352548; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
25362549; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
25372550; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
@@ -2800,10 +2813,11 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
28002813; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
28012814; GFX12-NEXT: s_wait_loadcnt 0x0
28022815; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
2816+ ; GFX12-NEXT: global_wb scope:SCOPE_DEV
28032817; GFX12-NEXT: s_wait_storecnt 0x0
2804- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
28052818; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
28062819; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
2820+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
28072821; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
28082822; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
28092823; GFX12-NEXT: s_wait_loadcnt 0x0
0 commit comments