@@ -1823,22 +1823,22 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
18231823; GFX1264: ; %bb.0: ; %entry
18241824; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
18251825; GFX1264-NEXT: s_mov_b64 s[6:7], exec
1826- ; GFX1264-NEXT: s_mov_b32 s9, 0
1827- ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
18281826; GFX1264-NEXT: s_mov_b64 s[4:5], exec
1827+ ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
18291828; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
18301829; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
18311830; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
18321831; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
18331832; GFX1264-NEXT: s_cbranch_execz .LBB3_2
18341833; GFX1264-NEXT: ; %bb.1:
1835- ; GFX1264-NEXT: s_bcnt1_i32_b64 s8, s[6:7]
1834+ ; GFX1264-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
1835+ ; GFX1264-NEXT: v_mov_b32_e32 v1, 0
1836+ ; GFX1264-NEXT: s_wait_alu 0xfffe
1837+ ; GFX1264-NEXT: s_mul_i32 s6, s6, 5
18361838; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
1837- ; GFX1264-NEXT: s_mul_u64 s[6:7], s[8:9], 5
1838- ; GFX1264-NEXT: s_mov_b32 s10, -1
18391839; GFX1264-NEXT: s_wait_alu 0xfffe
18401840; GFX1264-NEXT: v_mov_b32_e32 v0, s6
1841- ; GFX1264-NEXT: v_mov_b32_e32 v1, s7
1841+ ; GFX1264-NEXT: s_mov_b32 s10, -1
18421842; GFX1264-NEXT: s_wait_kmcnt 0x0
18431843; GFX1264-NEXT: s_mov_b32 s8, s2
18441844; GFX1264-NEXT: s_mov_b32 s9, s3
@@ -1860,29 +1860,27 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
18601860; GFX1232-LABEL: add_i64_constant:
18611861; GFX1232: ; %bb.0: ; %entry
18621862; GFX1232-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1863- ; GFX1232-NEXT: s_mov_b32 s7, exec_lo
1864- ; GFX1232-NEXT: s_mov_b32 s5, 0
1865- ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
18661863; GFX1232-NEXT: s_mov_b32 s6, exec_lo
1864+ ; GFX1232-NEXT: s_mov_b32 s4, exec_lo
1865+ ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s6, 0
18671866; GFX1232-NEXT: ; implicit-def: $vgpr0_vgpr1
18681867; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
18691868; GFX1232-NEXT: v_cmpx_eq_u32_e32 0, v2
18701869; GFX1232-NEXT: s_cbranch_execz .LBB3_2
18711870; GFX1232-NEXT: ; %bb.1:
1872- ; GFX1232-NEXT: s_bcnt1_i32_b32 s4, s7
1871+ ; GFX1232-NEXT: s_bcnt1_i32_b32 s5, s6
18731872; GFX1232-NEXT: s_mov_b32 s11, 0x31016000
1874- ; GFX1232-NEXT: s_mul_u64 s[4:5], s[4:5] , 5
1873+ ; GFX1232-NEXT: s_mul_i32 s5, s5 , 5
18751874; GFX1232-NEXT: s_mov_b32 s10, -1
1876- ; GFX1232-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
1875+ ; GFX1232-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, 0
18771876; GFX1232-NEXT: s_wait_kmcnt 0x0
18781877; GFX1232-NEXT: s_mov_b32 s8, s2
18791878; GFX1232-NEXT: s_mov_b32 s9, s3
18801879; GFX1232-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18811880; GFX1232-NEXT: s_wait_loadcnt 0x0
18821881; GFX1232-NEXT: global_inv scope:SCOPE_DEV
18831882; GFX1232-NEXT: .LBB3_2:
1884- ; GFX1232-NEXT: s_wait_alu 0xfffe
1885- ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6
1883+ ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s4
18861884; GFX1232-NEXT: s_wait_kmcnt 0x0
18871885; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
18881886; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
@@ -5372,22 +5370,22 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
53725370; GFX1264: ; %bb.0: ; %entry
53735371; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
53745372; GFX1264-NEXT: s_mov_b64 s[6:7], exec
5375- ; GFX1264-NEXT: s_mov_b32 s9, 0
5376- ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
53775373; GFX1264-NEXT: s_mov_b64 s[4:5], exec
5374+ ; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
53785375; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
53795376; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
53805377; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
53815378; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
53825379; GFX1264-NEXT: s_cbranch_execz .LBB9_2
53835380; GFX1264-NEXT: ; %bb.1:
5384- ; GFX1264-NEXT: s_bcnt1_i32_b64 s8, s[6:7]
5381+ ; GFX1264-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
5382+ ; GFX1264-NEXT: v_mov_b32_e32 v1, 0
5383+ ; GFX1264-NEXT: s_wait_alu 0xfffe
5384+ ; GFX1264-NEXT: s_mul_i32 s6, s6, 5
53855385; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
5386- ; GFX1264-NEXT: s_mul_u64 s[6:7], s[8:9], 5
5387- ; GFX1264-NEXT: s_mov_b32 s10, -1
53885386; GFX1264-NEXT: s_wait_alu 0xfffe
53895387; GFX1264-NEXT: v_mov_b32_e32 v0, s6
5390- ; GFX1264-NEXT: v_mov_b32_e32 v1, s7
5388+ ; GFX1264-NEXT: s_mov_b32 s10, -1
53915389; GFX1264-NEXT: s_wait_kmcnt 0x0
53925390; GFX1264-NEXT: s_mov_b32 s8, s2
53935391; GFX1264-NEXT: s_mov_b32 s9, s3
@@ -5412,29 +5410,27 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
54125410; GFX1232-LABEL: sub_i64_constant:
54135411; GFX1232: ; %bb.0: ; %entry
54145412; GFX1232-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5415- ; GFX1232-NEXT: s_mov_b32 s7, exec_lo
5416- ; GFX1232-NEXT: s_mov_b32 s5, 0
5417- ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
54185413; GFX1232-NEXT: s_mov_b32 s6, exec_lo
5414+ ; GFX1232-NEXT: s_mov_b32 s4, exec_lo
5415+ ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s6, 0
54195416; GFX1232-NEXT: ; implicit-def: $vgpr0_vgpr1
54205417; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
54215418; GFX1232-NEXT: v_cmpx_eq_u32_e32 0, v2
54225419; GFX1232-NEXT: s_cbranch_execz .LBB9_2
54235420; GFX1232-NEXT: ; %bb.1:
5424- ; GFX1232-NEXT: s_bcnt1_i32_b32 s4, s7
5421+ ; GFX1232-NEXT: s_bcnt1_i32_b32 s5, s6
54255422; GFX1232-NEXT: s_mov_b32 s11, 0x31016000
5426- ; GFX1232-NEXT: s_mul_u64 s[4:5], s[4:5] , 5
5423+ ; GFX1232-NEXT: s_mul_i32 s5, s5 , 5
54275424; GFX1232-NEXT: s_mov_b32 s10, -1
5428- ; GFX1232-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
5425+ ; GFX1232-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, 0
54295426; GFX1232-NEXT: s_wait_kmcnt 0x0
54305427; GFX1232-NEXT: s_mov_b32 s8, s2
54315428; GFX1232-NEXT: s_mov_b32 s9, s3
54325429; GFX1232-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
54335430; GFX1232-NEXT: s_wait_loadcnt 0x0
54345431; GFX1232-NEXT: global_inv scope:SCOPE_DEV
54355432; GFX1232-NEXT: .LBB9_2:
5436- ; GFX1232-NEXT: s_wait_alu 0xfffe
5437- ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6
5433+ ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s4
54385434; GFX1232-NEXT: s_wait_kmcnt 0x0
54395435; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
54405436; GFX1232-NEXT: v_mul_u32_u24_e32 v0, 5, v2
0 commit comments