@@ -1635,7 +1635,6 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
16351635; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0
16361636; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
16371637; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0
1638- ; GFX9-NEXT: v_mov_b32_e32 v7, s11
16391638; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2]
16401639; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0
16411640; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2]
@@ -1683,149 +1682,150 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
16831682; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
16841683; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
16851684; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
1686- ; GFX9-NEXT: v_add3_u32 v6, v3, v2, v6
1687- ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v6, v[1:2]
1688- ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s10, v0
1689- ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s9, v5, v[1:2]
1685+ ; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6
1686+ ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
1687+ ; GFX9-NEXT: v_mov_b32_e32 v6, s11
1688+ ; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s10, v0
1689+ ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
16901690; GFX9-NEXT: v_mov_b32_e32 v4, s9
16911691; GFX9-NEXT: s_ashr_i32 s10, s3, 31
1692- ; GFX9-NEXT: v_subb_co_u32_e64 v1 , s[0:1], v7, v2 , vcc
1693- ; GFX9-NEXT: v_sub_u32_e32 v2 , s11, v2
1694- ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v1
1695- ; GFX9-NEXT: v_cndmask_b32_e64 v3 , 0, -1, s[0:1]
1696- ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
1697- ; GFX9-NEXT: v_subb_co_u32_e32 v2 , vcc, v2 , v4, vcc
1698- ; GFX9-NEXT: v_cndmask_b32_e64 v7 , 0, -1, s[0:1]
1699- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v1
1700- ; GFX9-NEXT: v_subrev_co_u32_e32 v8 , vcc, s8, v0
1701- ; GFX9-NEXT: v_cndmask_b32_e64 v7, v3, v7 , s[0:1]
1702- ; GFX9-NEXT: v_subbrev_co_u32_e64 v9 , s[0:1], 0, v2 , vcc
1703- ; GFX9-NEXT: v_add_co_u32_e64 v10 , s[0:1], 1, v5
1704- ; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v6 , s[0:1]
1705- ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9
1706- ; GFX9-NEXT: v_cndmask_b32_e64 v3 , 0, -1, s[0:1]
1707- ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8
1692+ ; GFX9-NEXT: v_subb_co_u32_e64 v6 , s[0:1], v6, v1 , vcc
1693+ ; GFX9-NEXT: v_sub_u32_e32 v0 , s11, v1
1694+ ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6
1695+ ; GFX9-NEXT: v_cndmask_b32_e64 v1 , 0, -1, s[0:1]
1696+ ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7
1697+ ; GFX9-NEXT: v_subb_co_u32_e32 v0 , vcc, v0 , v4, vcc
1698+ ; GFX9-NEXT: v_cndmask_b32_e64 v2 , 0, -1, s[0:1]
1699+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6
1700+ ; GFX9-NEXT: v_subrev_co_u32_e32 v9 , vcc, s8, v7
1701+ ; GFX9-NEXT: v_cndmask_b32_e64 v8, v1, v2 , s[0:1]
1702+ ; GFX9-NEXT: v_subbrev_co_u32_e64 v10 , s[0:1], 0, v0 , vcc
1703+ ; GFX9-NEXT: v_add_co_u32_e64 v2 , s[0:1], 1, v5
1704+ ; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3 , s[0:1]
1705+ ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v10
1706+ ; GFX9-NEXT: v_cndmask_b32_e64 v1 , 0, -1, s[0:1]
1707+ ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v9
17081708; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
1709- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9
1710- ; GFX9-NEXT: v_cndmask_b32_e64 v12, v3 , v12, s[0:1]
1711- ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10
1709+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v10
1710+ ; GFX9-NEXT: v_cndmask_b32_e64 v12, v1 , v12, s[0:1]
1711+ ; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v2
17121712; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1]
17131713; GFX9-NEXT: s_add_u32 s0, s18, s6
17141714; GFX9-NEXT: s_addc_u32 s1, s19, s6
17151715; GFX9-NEXT: s_add_u32 s2, s2, s10
17161716; GFX9-NEXT: s_mov_b32 s11, s10
17171717; GFX9-NEXT: s_addc_u32 s3, s3, s10
17181718; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11]
1719- ; GFX9-NEXT: v_cvt_f32_u32_e32 v3 , s3
1719+ ; GFX9-NEXT: v_cvt_f32_u32_e32 v1 , s3
17201720; GFX9-NEXT: v_cvt_f32_u32_e32 v15, s2
1721- ; GFX9-NEXT: v_subb_co_u32_e32 v2 , vcc, v2 , v4, vcc
1722- ; GFX9-NEXT: v_mul_f32_e32 v3 , 0x4f800000, v3
1723- ; GFX9-NEXT: v_add_f32_e32 v3, v3 , v15
1724- ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
1725- ; GFX9-NEXT: v_subrev_co_u32_e32 v15 , vcc, s8, v8
1726- ; GFX9-NEXT: v_subbrev_co_u32_e32 v16 , vcc, 0, v2 , vcc
1727- ; GFX9-NEXT: v_mul_f32_e32 v2 , 0x5f7ffffc, v3
1728- ; GFX9-NEXT: v_mul_f32_e32 v3 , 0x2f800000, v2
1729- ; GFX9-NEXT: v_trunc_f32_e32 v4, v3
1730- ; GFX9-NEXT: v_mul_f32_e32 v3 , 0xcf800000, v4
1731- ; GFX9-NEXT: v_add_f32_e32 v2, v3, v2
1732- ; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v2
1721+ ; GFX9-NEXT: v_subb_co_u32_e32 v0 , vcc, v0 , v4, vcc
1722+ ; GFX9-NEXT: v_mul_f32_e32 v1 , 0x4f800000, v1
1723+ ; GFX9-NEXT: v_add_f32_e32 v1, v1 , v15
1724+ ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1
1725+ ; GFX9-NEXT: v_subrev_co_u32_e32 v4 , vcc, s8, v9
1726+ ; GFX9-NEXT: v_subbrev_co_u32_e32 v15 , vcc, 0, v0 , vcc
1727+ ; GFX9-NEXT: v_mul_f32_e32 v0 , 0x5f7ffffc, v1
1728+ ; GFX9-NEXT: v_mul_f32_e32 v1 , 0x2f800000, v0
1729+ ; GFX9-NEXT: v_trunc_f32_e32 v16, v1
1730+ ; GFX9-NEXT: v_mul_f32_e32 v1 , 0xcf800000, v16
1731+ ; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
1732+ ; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v0
17331733; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7]
17341734; GFX9-NEXT: s_sub_u32 s5, 0, s2
17351735; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
1736- ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v17, 0
1737- ; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v4
1736+ ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v17, 0
1737+ ; GFX9-NEXT: v_cndmask_b32_e32 v12, v2, v13, vcc
1738+ ; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v16
17381739; GFX9-NEXT: s_subb_u32 s20, 0, s3
17391740; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc
1740- ; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc
1741- ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v12, v[3:4]
1742- ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7
1743- ; GFX9-NEXT: v_mul_lo_u32 v7, v12, v2
1744- ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[18:19], s20, v17, v[3:4]
1745- ; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v11, s[0:1]
1746- ; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v15, vcc
1747- ; GFX9-NEXT: v_mul_lo_u32 v8, v17, v3
1748- ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1]
1749- ; GFX9-NEXT: v_mul_hi_u32 v10, v17, v2
1750- ; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v16, vcc
1751- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8
1752- ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1753- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10
1754- ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1755- ; GFX9-NEXT: v_mul_lo_u32 v10, v12, v3
1756- ; GFX9-NEXT: v_mul_hi_u32 v2, v12, v2
1757- ; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
1758- ; GFX9-NEXT: v_mul_hi_u32 v8, v17, v3
1759- ; GFX9-NEXT: v_mul_hi_u32 v3, v12, v3
1760- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
1741+ ; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
1742+ ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v13, v[1:2]
1743+ ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8
1744+ ; GFX9-NEXT: v_cndmask_b32_e64 v8, v3, v11, s[0:1]
1745+ ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s20, v17, v[1:2]
1746+ ; GFX9-NEXT: v_mul_lo_u32 v2, v13, v0
1747+ ; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v15, vcc
1748+ ; GFX9-NEXT: v_mul_lo_u32 v3, v17, v1
1749+ ; GFX9-NEXT: v_mul_hi_u32 v10, v17, v0
1750+ ; GFX9-NEXT: v_mul_hi_u32 v0, v13, v0
1751+ ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1]
1752+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
1753+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1754+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v10
1755+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1756+ ; GFX9-NEXT: v_mul_lo_u32 v10, v13, v1
1757+ ; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
1758+ ; GFX9-NEXT: v_mul_hi_u32 v3, v17, v1
1759+ ; GFX9-NEXT: v_mul_hi_u32 v1, v13, v1
1760+ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v10, v0
17611761; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1762- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v8
1763- ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1764- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
1765- ; GFX9-NEXT: v_add_u32_e32 v8, v10, v8
1766- ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1767- ; GFX9-NEXT: v_add3_u32 v3, v8, v7, v3
1768- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v17, v2
1769- ; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v12, v3, vcc
1770- ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v7, 0
1771- ; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1]
1772- ; GFX9-NEXT: v_cndmask_b32_e64 v9, v1, v9, s[0:1]
1762+ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
1763+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1764+ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1765+ ; GFX9-NEXT: v_add_u32_e32 v3, v10, v3
1766+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1767+ ; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v17, v0
1768+ ; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
1769+ ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v10, 0
1770+ ; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v13, v1, vcc
17731771; GFX9-NEXT: v_mov_b32_e32 v0, v3
1774- ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v8, v[0:1]
1775- ; GFX9-NEXT: v_xor_b32_e32 v10, s17, v4
1772+ ; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v4, s[0:1]
1773+ ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1]
1774+ ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v11, v[0:1]
17761775; GFX9-NEXT: v_xor_b32_e32 v5, s16, v5
1777- ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v7, v[0:1]
1778- ; GFX9-NEXT: v_mov_b32_e32 v11, s17
1776+ ; GFX9-NEXT: v_xor_b32_e32 v8, s17, v8
1777+ ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v10, v[0:1]
1778+ ; GFX9-NEXT: v_mov_b32_e32 v9, s17
17791779; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s16, v5
1780- ; GFX9-NEXT: v_xor_b32_e32 v4, s4, v6
1781- ; GFX9-NEXT: v_mul_lo_u32 v5, v8 , v2
1782- ; GFX9-NEXT: v_mul_lo_u32 v6, v7 , v3
1783- ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v10, v11 , vcc
1784- ; GFX9-NEXT: v_mul_hi_u32 v10, v7 , v2
1785- ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v6
1786- ; GFX9-NEXT: v_cndmask_b32_e64 v6 , 0, 1, vcc
1787- ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v10
1780+ ; GFX9-NEXT: v_xor_b32_e32 v4, s4, v7
1781+ ; GFX9-NEXT: v_mul_lo_u32 v5, v11 , v2
1782+ ; GFX9-NEXT: v_mul_lo_u32 v7, v10 , v3
1783+ ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v9 , vcc
1784+ ; GFX9-NEXT: v_mul_hi_u32 v8, v10 , v2
1785+ ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7
1786+ ; GFX9-NEXT: v_cndmask_b32_e64 v7 , 0, 1, vcc
1787+ ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8
17881788; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1789- ; GFX9-NEXT: v_mul_lo_u32 v10, v8 , v3
1790- ; GFX9-NEXT: v_mul_hi_u32 v2, v8 , v2
1791- ; GFX9-NEXT: v_add_u32_e32 v5, v6 , v5
1792- ; GFX9-NEXT: v_mul_hi_u32 v6, v7 , v3
1793- ; GFX9-NEXT: v_mul_hi_u32 v3, v8 , v3
1794- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10 , v2
1795- ; GFX9-NEXT: v_cndmask_b32_e64 v10 , 0, 1, vcc
1796- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
1797- ; GFX9-NEXT: v_cndmask_b32_e64 v6 , 0, 1, vcc
1789+ ; GFX9-NEXT: v_mul_lo_u32 v8, v11 , v3
1790+ ; GFX9-NEXT: v_mul_hi_u32 v2, v11 , v2
1791+ ; GFX9-NEXT: v_add_u32_e32 v5, v7 , v5
1792+ ; GFX9-NEXT: v_mul_hi_u32 v7, v10 , v3
1793+ ; GFX9-NEXT: v_mul_hi_u32 v3, v11 , v3
1794+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8 , v2
1795+ ; GFX9-NEXT: v_cndmask_b32_e64 v8 , 0, 1, vcc
1796+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
1797+ ; GFX9-NEXT: v_cndmask_b32_e64 v7 , 0, 1, vcc
17981798; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
1799- ; GFX9-NEXT: v_add_u32_e32 v6, v10, v6
1799+ ; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
18001800; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1801- ; GFX9-NEXT: v_add3_u32 v3, v6 , v5, v3
1802- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7 , v2
1803- ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8 , v3, vcc
1801+ ; GFX9-NEXT: v_add3_u32 v3, v7 , v5, v3
1802+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10 , v2
1803+ ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11 , v3, vcc
18041804; GFX9-NEXT: v_mul_lo_u32 v5, s9, v2
1805- ; GFX9-NEXT: v_mul_lo_u32 v6 , s8, v3
1806- ; GFX9-NEXT: v_mul_hi_u32 v8 , s8, v2
1805+ ; GFX9-NEXT: v_mul_lo_u32 v7 , s8, v3
1806+ ; GFX9-NEXT: v_mul_hi_u32 v9 , s8, v2
18071807; GFX9-NEXT: v_mul_hi_u32 v2, s9, v2
18081808; GFX9-NEXT: v_mul_hi_u32 v12, s9, v3
1809- ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v6
1810- ; GFX9-NEXT: v_cndmask_b32_e64 v6 , 0, 1, vcc
1811- ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8
1809+ ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7
1810+ ; GFX9-NEXT: v_cndmask_b32_e64 v7 , 0, 1, vcc
1811+ ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9
18121812; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1813- ; GFX9-NEXT: v_mul_lo_u32 v8 , s9, v3
1814- ; GFX9-NEXT: v_add_u32_e32 v5, v6 , v5
1815- ; GFX9-NEXT: v_mul_hi_u32 v6 , s8, v3
1816- ; GFX9-NEXT: v_xor_b32_e32 v9 , s4, v9
1817- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8 , v2
1818- ; GFX9-NEXT: v_cndmask_b32_e64 v8 , 0, 1, vcc
1819- ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
1820- ; GFX9-NEXT: v_cndmask_b32_e64 v6 , 0, 1, vcc
1813+ ; GFX9-NEXT: v_mul_lo_u32 v9 , s9, v3
1814+ ; GFX9-NEXT: v_add_u32_e32 v5, v7 , v5
1815+ ; GFX9-NEXT: v_mul_hi_u32 v7 , s8, v3
1816+ ; GFX9-NEXT: v_xor_b32_e32 v6 , s4, v6
1817+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9 , v2
1818+ ; GFX9-NEXT: v_cndmask_b32_e64 v9 , 0, 1, vcc
1819+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
1820+ ; GFX9-NEXT: v_cndmask_b32_e64 v7 , 0, 1, vcc
18211821; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v2, v5
18221822; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v10, 0
1823+ ; GFX9-NEXT: v_mov_b32_e32 v8, s4
18231824; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1824- ; GFX9-NEXT: v_add_u32_e32 v6, v8, v6
1825- ; GFX9-NEXT: v_mov_b32_e32 v7, s4
18261825; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s4, v4
1826+ ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v8, vcc
1827+ ; GFX9-NEXT: v_add_u32_e32 v6, v9, v7
18271828; GFX9-NEXT: v_add3_u32 v8, v6, v11, v12
1828- ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v9, v7, vcc
18291829; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v8, v[3:4]
18301830; GFX9-NEXT: v_mov_b32_e32 v9, s9
18311831; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v2
0 commit comments