@@ -18,7 +18,6 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
1818; GFX12-NEXT: s_wait_samplecnt 0x0
1919; GFX12-NEXT: s_wait_bvhcnt 0x0
2020; GFX12-NEXT: s_wait_kmcnt 0x0
21- ; GFX12-NEXT: global_wb scope:SCOPE_SE
2221; GFX12-NEXT: s_wait_storecnt 0x0
2322; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
2423; GFX12-NEXT: s_wait_dscnt 0x0
@@ -91,7 +90,6 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
9190; GFX12-NEXT: s_wait_samplecnt 0x0
9291; GFX12-NEXT: s_wait_bvhcnt 0x0
9392; GFX12-NEXT: s_wait_kmcnt 0x0
94- ; GFX12-NEXT: global_wb scope:SCOPE_SE
9593; GFX12-NEXT: s_wait_storecnt 0x0
9694; GFX12-NEXT: ds_max_num_f32 v0, v1
9795; GFX12-NEXT: s_wait_dscnt 0x0
@@ -164,7 +162,6 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
164162; GFX12-NEXT: s_wait_samplecnt 0x0
165163; GFX12-NEXT: s_wait_bvhcnt 0x0
166164; GFX12-NEXT: s_wait_kmcnt 0x0
167- ; GFX12-NEXT: global_wb scope:SCOPE_SE
168165; GFX12-NEXT: s_wait_storecnt 0x0
169166; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
170167; GFX12-NEXT: s_wait_dscnt 0x0
@@ -241,7 +238,6 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
241238; GFX12-NEXT: s_wait_samplecnt 0x0
242239; GFX12-NEXT: s_wait_bvhcnt 0x0
243240; GFX12-NEXT: s_wait_kmcnt 0x0
244- ; GFX12-NEXT: global_wb scope:SCOPE_SE
245241; GFX12-NEXT: s_wait_storecnt 0x0
246242; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
247243; GFX12-NEXT: s_wait_dscnt 0x0
@@ -318,7 +314,6 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
318314; GFX12-NEXT: s_wait_samplecnt 0x0
319315; GFX12-NEXT: s_wait_bvhcnt 0x0
320316; GFX12-NEXT: s_wait_kmcnt 0x0
321- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
322317; GFX12-NEXT: s_wait_storecnt 0x0
323318; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
324319; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -465,7 +460,6 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
465460; GFX12-NEXT: s_wait_samplecnt 0x0
466461; GFX12-NEXT: s_wait_bvhcnt 0x0
467462; GFX12-NEXT: s_wait_kmcnt 0x0
468- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
469463; GFX12-NEXT: s_wait_storecnt 0x0
470464; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
471465; GFX12-NEXT: s_wait_storecnt 0x0
@@ -617,7 +611,6 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
617611; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
618612; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
619613; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[2:3]
620- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
621614; GFX12-NEXT: s_wait_storecnt 0x0
622615; GFX12-NEXT: global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
623616; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -774,7 +767,6 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
774767; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
775768; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
776769; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
777- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
778770; GFX12-NEXT: s_wait_storecnt 0x0
779771; GFX12-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
780772; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -915,7 +907,6 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
915907; GFX12-NEXT: s_wait_samplecnt 0x0
916908; GFX12-NEXT: s_wait_bvhcnt 0x0
917909; GFX12-NEXT: s_wait_kmcnt 0x0
918- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
919910; GFX12-NEXT: s_wait_storecnt 0x0
920911; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
921912; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1058,7 +1049,6 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
10581049; GFX12-NEXT: s_wait_samplecnt 0x0
10591050; GFX12-NEXT: s_wait_bvhcnt 0x0
10601051; GFX12-NEXT: s_wait_kmcnt 0x0
1061- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
10621052; GFX12-NEXT: s_wait_storecnt 0x0
10631053; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
10641054; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1209,7 +1199,6 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
12091199; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
12101200; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
12111201; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[2:3]
1212- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
12131202; GFX12-NEXT: s_wait_storecnt 0x0
12141203; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
12151204; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1364,7 +1353,6 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
13641353; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
13651354; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
13661355; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
1367- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
13681356; GFX12-NEXT: s_wait_storecnt 0x0
13691357; GFX12-NEXT: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
13701358; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1507,7 +1495,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
15071495; GFX12-NEXT: s_wait_bvhcnt 0x0
15081496; GFX12-NEXT: s_wait_kmcnt 0x0
15091497; GFX12-NEXT: v_mov_b32_e32 v1, s6
1510- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
15111498; GFX12-NEXT: s_wait_storecnt 0x0
15121499; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
15131500; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -1684,7 +1671,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
16841671; GFX12-NEXT: s_wait_bvhcnt 0x0
16851672; GFX12-NEXT: s_wait_kmcnt 0x0
16861673; GFX12-NEXT: v_mov_b32_e32 v1, s6
1687- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
16881674; GFX12-NEXT: s_wait_storecnt 0x0
16891675; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
16901676; GFX12-NEXT: s_wait_storecnt 0x0
@@ -1865,11 +1851,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
18651851; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
18661852; GFX12-NEXT: s_wait_loadcnt 0x0
18671853; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
1868- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
18691854; GFX12-NEXT: s_wait_storecnt 0x0
1870- ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
18711855; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1856+ ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
18721857; GFX12-NEXT: v_max_num_f64_e32 v[7:8], v[0:1], v[4:5]
1858+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
18731859; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
18741860; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
18751861; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
@@ -2058,11 +2044,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
20582044; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
20592045; GFX12-NEXT: s_wait_loadcnt 0x0
20602046; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
2061- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
20622047; GFX12-NEXT: s_wait_storecnt 0x0
2048+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
20632049; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
20642050; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
2065- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
20662051; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
20672052; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
20682053; GFX12-NEXT: s_wait_loadcnt 0x0
0 commit comments