@@ -2824,16 +2824,16 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
2824
2824
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2825
2825
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
2826
2826
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2827
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
2828
- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2827
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
2828
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
2829
2829
; GFX11-NEXT: flat_atomic_inc_u32 v3, v[0:1], v3 offset:20 glc
2830
2830
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2831
2831
; GFX11-NEXT: buffer_gl1_inv
2832
2832
; GFX11-NEXT: buffer_gl0_inv
2833
2833
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2834
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2 )
2834
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1 )
2835
2835
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2836
- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2836
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
2837
2837
; GFX11-NEXT: flat_store_b32 v[0:1], v3
2838
2838
; GFX11-NEXT: s_endpgm
2839
2839
;
@@ -2846,15 +2846,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
2846
2846
; GFX12-NEXT: s_wait_kmcnt 0x0
2847
2847
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
2848
2848
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2849
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
2850
- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2849
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
2850
+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
2851
2851
; GFX12-NEXT: flat_atomic_inc_u32 v3, v[0:1], v3 offset:20 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2852
2852
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2853
2853
; GFX12-NEXT: global_inv scope:SCOPE_DEV
2854
2854
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2855
2855
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2856
2856
; GFX12-NEXT: s_wait_alu 0xfffd
2857
- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2857
+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
2858
2858
; GFX12-NEXT: flat_store_b32 v[0:1], v3
2859
2859
; GFX12-NEXT: s_endpgm
2860
2860
%id = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -2944,8 +2944,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
2944
2944
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2945
2945
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2946
2946
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2947
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
2948
- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2947
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
2948
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
2949
2949
; GFX11-NEXT: v_mov_b32_e32 v2, 42
2950
2950
; GFX11-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20
2951
2951
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -2963,8 +2963,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
2963
2963
; GFX12-NEXT: s_wait_kmcnt 0x0
2964
2964
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2965
2965
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
2966
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 )
2967
- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
2966
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 )
2967
+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
2968
2968
; GFX12-NEXT: v_mov_b32_e32 v2, 42
2969
2969
; GFX12-NEXT: flat_atomic_inc_u32 v[0:1], v2 offset:20 scope:SCOPE_DEV
2970
2970
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
@@ -3810,16 +3810,16 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
3810
3810
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3811
3811
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3812
3812
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3813
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3814
- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3813
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3814
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
3815
3815
; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:40 glc
3816
3816
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3817
3817
; GFX11-NEXT: buffer_gl1_inv
3818
3818
; GFX11-NEXT: buffer_gl0_inv
3819
3819
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
3820
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2 )
3820
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1 )
3821
3821
; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
3822
- ; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo , 0, v3, vcc_lo
3822
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null , 0, v3, vcc_lo
3823
3823
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
3824
3824
; GFX11-NEXT: s_endpgm
3825
3825
;
@@ -3833,15 +3833,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
3833
3833
; GFX12-NEXT: s_wait_kmcnt 0x0
3834
3834
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
3835
3835
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3836
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
3837
- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3836
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(SKIP_4) | instid1(VALU_DEP_1)
3837
+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
3838
3838
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[0:1], v[2:3] offset:40 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
3839
3839
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3840
3840
; GFX12-NEXT: global_inv scope:SCOPE_DEV
3841
3841
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
3842
3842
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
3843
3843
; GFX12-NEXT: s_wait_alu 0xfffd
3844
- ; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo , 0, v3, vcc_lo
3844
+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null , 0, v3, vcc_lo
3845
3845
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
3846
3846
; GFX12-NEXT: s_endpgm
3847
3847
%id = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -3936,8 +3936,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
3936
3936
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3937
3937
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3938
3938
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3939
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3940
- ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3939
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3940
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
3941
3941
; GFX11-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40
3942
3942
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3943
3943
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
@@ -3955,8 +3955,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
3955
3955
; GFX12-NEXT: s_wait_kmcnt 0x0
3956
3956
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
3957
3957
; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
3958
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3959
- ; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo , 0, v1, vcc_lo
3958
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3959
+ ; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null , 0, v1, vcc_lo
3960
3960
; GFX12-NEXT: flat_atomic_inc_u64 v[0:1], v[2:3] offset:40 scope:SCOPE_DEV
3961
3961
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
3962
3962
; GFX12-NEXT: global_inv scope:SCOPE_DEV
0 commit comments