@@ -1823,22 +1823,22 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
18231823; GFX1264: ; %bb.0: ; %entry
18241824; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
18251825; GFX1264-NEXT: s_mov_b64 s[6:7], exec
1826- ; GFX1264-NEXT: s_mov_b64 s[4:5], exec
1826+ ; GFX1264-NEXT: s_mov_b32 s9, 0
18271827; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
1828+ ; GFX1264-NEXT: s_mov_b64 s[4:5], exec
18281829; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
18291830; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
18301831; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
18311832; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
18321833; GFX1264-NEXT: s_cbranch_execz .LBB3_2
18331834; GFX1264-NEXT: ; %bb.1:
1834- ; GFX1264-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
1835- ; GFX1264-NEXT: v_mov_b32_e32 v1, 0
1836- ; GFX1264-NEXT: s_wait_alu 0xfffe
1837- ; GFX1264-NEXT: s_mul_i32 s6, s6, 5
1835+ ; GFX1264-NEXT: s_bcnt1_i32_b64 s8, s[6:7]
18381836; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
1837+ ; GFX1264-NEXT: s_mul_u64 s[6:7], s[8:9], 5
1838+ ; GFX1264-NEXT: s_mov_b32 s10, -1
18391839; GFX1264-NEXT: s_wait_alu 0xfffe
18401840; GFX1264-NEXT: v_mov_b32_e32 v0, s6
1841- ; GFX1264-NEXT: s_mov_b32 s10, -1
1841+ ; GFX1264-NEXT: v_mov_b32_e32 v1, s7
18421842; GFX1264-NEXT: s_wait_kmcnt 0x0
18431843; GFX1264-NEXT: s_mov_b32 s8, s2
18441844; GFX1264-NEXT: s_mov_b32 s9, s3
@@ -1860,27 +1860,29 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out, ptr addrspace
18601860; GFX1232-LABEL: add_i64_constant:
18611861; GFX1232: ; %bb.0: ; %entry
18621862; GFX1232-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1863+ ; GFX1232-NEXT: s_mov_b32 s7, exec_lo
1864+ ; GFX1232-NEXT: s_mov_b32 s5, 0
1865+ ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
18631866; GFX1232-NEXT: s_mov_b32 s6, exec_lo
1864- ; GFX1232-NEXT: s_mov_b32 s4, exec_lo
1865- ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s6, 0
18661867; GFX1232-NEXT: ; implicit-def: $vgpr0_vgpr1
18671868; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
18681869; GFX1232-NEXT: v_cmpx_eq_u32_e32 0, v2
18691870; GFX1232-NEXT: s_cbranch_execz .LBB3_2
18701871; GFX1232-NEXT: ; %bb.1:
1871- ; GFX1232-NEXT: s_bcnt1_i32_b32 s5, s6
1872+ ; GFX1232-NEXT: s_bcnt1_i32_b32 s4, s7
18721873; GFX1232-NEXT: s_mov_b32 s11, 0x31016000
1873- ; GFX1232-NEXT: s_mul_i32 s5, s5 , 5
1874+ ; GFX1232-NEXT: s_mul_u64 s[4:5], s[4:5] , 5
18741875; GFX1232-NEXT: s_mov_b32 s10, -1
1875- ; GFX1232-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, 0
1876+ ; GFX1232-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
18761877; GFX1232-NEXT: s_wait_kmcnt 0x0
18771878; GFX1232-NEXT: s_mov_b32 s8, s2
18781879; GFX1232-NEXT: s_mov_b32 s9, s3
18791880; GFX1232-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18801881; GFX1232-NEXT: s_wait_loadcnt 0x0
18811882; GFX1232-NEXT: global_inv scope:SCOPE_DEV
18821883; GFX1232-NEXT: .LBB3_2:
1883- ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s4
1884+ ; GFX1232-NEXT: s_wait_alu 0xfffe
1885+ ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6
18841886; GFX1232-NEXT: s_wait_kmcnt 0x0
18851887; GFX1232-NEXT: v_readfirstlane_b32 s3, v1
18861888; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
@@ -5370,22 +5372,22 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
53705372; GFX1264: ; %bb.0: ; %entry
53715373; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
53725374; GFX1264-NEXT: s_mov_b64 s[6:7], exec
5373- ; GFX1264-NEXT: s_mov_b64 s[4:5], exec
5375+ ; GFX1264-NEXT: s_mov_b32 s9, 0
53745376; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
5377+ ; GFX1264-NEXT: s_mov_b64 s[4:5], exec
53755378; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
53765379; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
53775380; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
53785381; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
53795382; GFX1264-NEXT: s_cbranch_execz .LBB9_2
53805383; GFX1264-NEXT: ; %bb.1:
5381- ; GFX1264-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
5382- ; GFX1264-NEXT: v_mov_b32_e32 v1, 0
5383- ; GFX1264-NEXT: s_wait_alu 0xfffe
5384- ; GFX1264-NEXT: s_mul_i32 s6, s6, 5
5384+ ; GFX1264-NEXT: s_bcnt1_i32_b64 s8, s[6:7]
53855385; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
5386+ ; GFX1264-NEXT: s_mul_u64 s[6:7], s[8:9], 5
5387+ ; GFX1264-NEXT: s_mov_b32 s10, -1
53865388; GFX1264-NEXT: s_wait_alu 0xfffe
53875389; GFX1264-NEXT: v_mov_b32_e32 v0, s6
5388- ; GFX1264-NEXT: s_mov_b32 s10, -1
5390+ ; GFX1264-NEXT: v_mov_b32_e32 v1, s7
53895391; GFX1264-NEXT: s_wait_kmcnt 0x0
53905392; GFX1264-NEXT: s_mov_b32 s8, s2
53915393; GFX1264-NEXT: s_mov_b32 s9, s3
@@ -5410,27 +5412,29 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
54105412; GFX1232-LABEL: sub_i64_constant:
54115413; GFX1232: ; %bb.0: ; %entry
54125414; GFX1232-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5415+ ; GFX1232-NEXT: s_mov_b32 s7, exec_lo
5416+ ; GFX1232-NEXT: s_mov_b32 s5, 0
5417+ ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
54135418; GFX1232-NEXT: s_mov_b32 s6, exec_lo
5414- ; GFX1232-NEXT: s_mov_b32 s4, exec_lo
5415- ; GFX1232-NEXT: v_mbcnt_lo_u32_b32 v2, s6, 0
54165419; GFX1232-NEXT: ; implicit-def: $vgpr0_vgpr1
54175420; GFX1232-NEXT: s_delay_alu instid0(VALU_DEP_1)
54185421; GFX1232-NEXT: v_cmpx_eq_u32_e32 0, v2
54195422; GFX1232-NEXT: s_cbranch_execz .LBB9_2
54205423; GFX1232-NEXT: ; %bb.1:
5421- ; GFX1232-NEXT: s_bcnt1_i32_b32 s5, s6
5424+ ; GFX1232-NEXT: s_bcnt1_i32_b32 s4, s7
54225425; GFX1232-NEXT: s_mov_b32 s11, 0x31016000
5423- ; GFX1232-NEXT: s_mul_i32 s5, s5 , 5
5426+ ; GFX1232-NEXT: s_mul_u64 s[4:5], s[4:5] , 5
54245427; GFX1232-NEXT: s_mov_b32 s10, -1
5425- ; GFX1232-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, 0
5428+ ; GFX1232-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
54265429; GFX1232-NEXT: s_wait_kmcnt 0x0
54275430; GFX1232-NEXT: s_mov_b32 s8, s2
54285431; GFX1232-NEXT: s_mov_b32 s9, s3
54295432; GFX1232-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
54305433; GFX1232-NEXT: s_wait_loadcnt 0x0
54315434; GFX1232-NEXT: global_inv scope:SCOPE_DEV
54325435; GFX1232-NEXT: .LBB9_2:
5433- ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s4
5436+ ; GFX1232-NEXT: s_wait_alu 0xfffe
5437+ ; GFX1232-NEXT: s_or_b32 exec_lo, exec_lo, s6
54345438; GFX1232-NEXT: s_wait_kmcnt 0x0
54355439; GFX1232-NEXT: v_readfirstlane_b32 s2, v0
54365440; GFX1232-NEXT: v_mul_u32_u24_e32 v0, 5, v2
0 commit comments