@@ -2824,16 +2824,16 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
28242824; GFX11-NEXT: s_waitcnt lgkmcnt(0)
28252825; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
28262826; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2827- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
2828- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2827+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
2828+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
28292829; GFX11-NEXT: flat_atomic_inc_u32 v3, v[0:1], v3 offset:20 glc
28302830; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
28312831; GFX11-NEXT: buffer_gl1_inv
28322832; GFX11-NEXT: buffer_gl0_inv
28332833; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2834- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2 )
2834+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1 )
28352835; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2836- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2836+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
28372837; GFX11-NEXT: flat_store_b32 v[0:1], v3
28382838; GFX11-NEXT: s_endpgm
28392839;
@@ -2846,15 +2846,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
28462846; GFX12-NEXT: s_wait_kmcnt 0x0
28472847; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
28482848; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2849- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
2850- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2849+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
2850+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
28512851; GFX12-NEXT: flat_atomic_inc_u32 v3, v[0:1], v3 offset:20 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
28522852; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
28532853; GFX12-NEXT: global_inv scope:SCOPE_DEV
28542854; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
28552855; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
28562856; GFX12-NEXT: s_wait_alu 0xfffd
2857- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2857+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
28582858; GFX12-NEXT: flat_store_b32 v[0:1], v3
28592859; GFX12-NEXT: s_endpgm
28602860 %id = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -2944,8 +2944,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
29442944; GFX11-NEXT: s_waitcnt lgkmcnt(0)
29452945; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
29462946; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2947- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
2948- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2947+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
2948+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
29492949; GFX11-NEXT: v_mov_b32_e32 v2, 42
29502950; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20
29512951; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -2963,8 +2963,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
29632963; GFX12-NEXT: s_wait_kmcnt 0x0
29642964; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
29652965; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2966- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 )
2967- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2966+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 )
2967+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
29682968; GFX12-NEXT: v_mov_b32_e32 v2, 42
29692969; GFX12-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20 scope:SCOPE_DEV
29702970; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
@@ -3810,16 +3810,16 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
38103810; GFX11-NEXT: s_waitcnt lgkmcnt(0)
38113811; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
38123812; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3813- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3814- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3813+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3814+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
38153815; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:40 glc
38163816; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
38173817; GFX11-NEXT: buffer_gl1_inv
38183818; GFX11-NEXT: buffer_gl0_inv
38193819; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
3820- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2 )
3820+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1 )
38213821; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
3822- ; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo , 0, v3, vcc_lo
3822+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null , 0, v3, vcc_lo
38233823; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
38243824; GFX11-NEXT: s_endpgm
38253825;
@@ -3833,15 +3833,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
38333833; GFX12-NEXT: s_wait_kmcnt 0x0
38343834; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
38353835; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3836- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
3837- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3836+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
3837+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
38383838; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:40 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
38393839; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
38403840; GFX12-NEXT: global_inv scope:SCOPE_DEV
38413841; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
38423842; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
38433843; GFX12-NEXT: s_wait_alu 0xfffd
3844- ; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo , 0, v3, vcc_lo
3844+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null , 0, v3, vcc_lo
38453845; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
38463846; GFX12-NEXT: s_endpgm
38473847 %id = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -3936,8 +3936,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
39363936; GFX11-NEXT: s_waitcnt lgkmcnt(0)
39373937; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
39383938; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3939- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3940- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3939+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3940+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
39413941; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40
39423942; GFX11-NEXT: s_waitcnt lgkmcnt(0)
39433943; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
@@ -3955,8 +3955,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
39553955; GFX12-NEXT: s_wait_kmcnt 0x0
39563956; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
39573957; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3958- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3959- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3958+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3959+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
39603960; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40 scope:SCOPE_DEV
39613961; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
39623962; GFX12-NEXT: global_inv scope:SCOPE_DEV
0 commit comments