@@ -1667,7 +1667,7 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
16671667; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1
16681668; GFX9-NEXT: v_mul_hi_u32 v4, s10, v0
16691669; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0
1670- ; GFX9-NEXT: v_mul_hi_u32 v6 , s11, v1
1670+ ; GFX9-NEXT: v_mul_hi_u32 v5 , s11, v1
16711671; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
16721672; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
16731673; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
@@ -1679,155 +1679,155 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
16791679; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
16801680; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
16811681; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1682- ; GFX9-NEXT: v_add_co_u32_e32 v5 , vcc, v0, v2
1683- ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v5 , 0
1682+ ; GFX9-NEXT: v_add_co_u32_e32 v6 , vcc, v0, v2
1683+ ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v6 , 0
16841684; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
16851685; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
1686- ; GFX9-NEXT: v_add3_u32 v4 , v3, v0, v6
1686+ ; GFX9-NEXT: v_add3_u32 v8 , v3, v0, v5
16871687; GFX9-NEXT: v_mov_b32_e32 v0, v2
1688- ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s8, v4 , v[0:1]
1689- ; GFX9-NEXT: v_mov_b32_e32 v6 , s11
1690- ; GFX9-NEXT: v_sub_co_u32_e32 v8 , vcc, s10, v1
1691- ; GFX9-NEXT: v_mad_u64_u32 v[2:3 ], s[0:1], s9, v5 , v[2:3]
1688+ ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s8, v8 , v[0:1]
1689+ ; GFX9-NEXT: v_mov_b32_e32 v5 , s11
1690+ ; GFX9-NEXT: v_sub_co_u32_e32 v1 , vcc, s10, v1
1691+ ; GFX9-NEXT: v_mad_u64_u32 v[3:4 ], s[0:1], s9, v6 , v[2:3]
16921692; GFX9-NEXT: s_ashr_i32 s10, s3, 31
16931693; GFX9-NEXT: v_mov_b32_e32 v0, 0
1694- ; GFX9-NEXT: v_subb_co_u32_e64 v6 , s[0:1], v6, v2 , vcc
1695- ; GFX9-NEXT: v_sub_u32_e32 v1 , s11, v2
1696- ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6
1697- ; GFX9-NEXT: v_cndmask_b32_e64 v2 , 0, -1, s[0:1]
1698- ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8
1699- ; GFX9-NEXT: v_subb_co_u32_e32 v1 , vcc, v1 , v7, vcc
1700- ; GFX9-NEXT: v_cndmask_b32_e64 v3 , 0, -1, s[0:1]
1701- ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6
1702- ; GFX9-NEXT: v_subrev_co_u32_e32 v10, vcc, s8, v8
1703- ; GFX9-NEXT: v_cndmask_b32_e64 v9, v2, v3 , s[0:1]
1704- ; GFX9-NEXT: v_subbrev_co_u32_e64 v11, s[0:1], 0, v1 , vcc
1705- ; GFX9-NEXT: v_add_co_u32_e64 v3 , s[0:1], 1, v5
1706- ; GFX9-NEXT: v_addc_co_u32_e64 v12, s[0:1], 0, v4 , s[0:1]
1694+ ; GFX9-NEXT: v_subb_co_u32_e64 v2 , s[0:1], v5, v3 , vcc
1695+ ; GFX9-NEXT: v_sub_u32_e32 v3 , s11, v3
1696+ ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v2
1697+ ; GFX9-NEXT: v_cndmask_b32_e64 v4 , 0, -1, s[0:1]
1698+ ; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v1
1699+ ; GFX9-NEXT: v_subb_co_u32_e32 v3 , vcc, v3 , v7, vcc
1700+ ; GFX9-NEXT: v_cndmask_b32_e64 v5 , 0, -1, s[0:1]
1701+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v2
1702+ ; GFX9-NEXT: v_subrev_co_u32_e32 v10, vcc, s8, v1
1703+ ; GFX9-NEXT: v_cndmask_b32_e64 v9, v4, v5 , s[0:1]
1704+ ; GFX9-NEXT: v_subbrev_co_u32_e64 v11, s[0:1], 0, v3 , vcc
1705+ ; GFX9-NEXT: v_add_co_u32_e64 v5 , s[0:1], 1, v6
1706+ ; GFX9-NEXT: v_addc_co_u32_e64 v12, s[0:1], 0, v8 , s[0:1]
17071707; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v11
1708- ; GFX9-NEXT: v_cndmask_b32_e64 v2 , 0, -1, s[0:1]
1708+ ; GFX9-NEXT: v_cndmask_b32_e64 v4 , 0, -1, s[0:1]
17091709; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v10
17101710; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1]
17111711; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v11
1712- ; GFX9-NEXT: v_cndmask_b32_e64 v13, v2 , v13, s[0:1]
1713- ; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v3
1712+ ; GFX9-NEXT: v_cndmask_b32_e64 v13, v4 , v13, s[0:1]
1713+ ; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v5
17141714; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v12, s[0:1]
17151715; GFX9-NEXT: s_add_u32 s0, s18, s6
17161716; GFX9-NEXT: s_addc_u32 s1, s19, s6
17171717; GFX9-NEXT: s_add_u32 s2, s2, s10
17181718; GFX9-NEXT: s_mov_b32 s11, s10
17191719; GFX9-NEXT: s_addc_u32 s3, s3, s10
17201720; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11]
1721- ; GFX9-NEXT: v_cvt_f32_u32_e32 v2 , s3
1721+ ; GFX9-NEXT: v_cvt_f32_u32_e32 v4 , s3
17221722; GFX9-NEXT: v_cvt_f32_u32_e32 v16, s2
1723- ; GFX9-NEXT: v_subb_co_u32_e32 v1 , vcc, v1 , v7, vcc
1724- ; GFX9-NEXT: v_mul_f32_e32 v2 , 0x4f800000, v2
1725- ; GFX9-NEXT: v_add_f32_e32 v2, v2 , v16
1726- ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
1723+ ; GFX9-NEXT: v_subb_co_u32_e32 v3 , vcc, v3 , v7, vcc
1724+ ; GFX9-NEXT: v_mul_f32_e32 v4 , 0x4f800000, v4
1725+ ; GFX9-NEXT: v_add_f32_e32 v4, v4 , v16
1726+ ; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v4
17271727; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s8, v10
1728- ; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v1 , vcc
1729- ; GFX9-NEXT: v_mul_f32_e32 v1 , 0x5f7ffffc, v2
1730- ; GFX9-NEXT: v_mul_f32_e32 v2 , 0x2f800000, v1
1731- ; GFX9-NEXT: v_trunc_f32_e32 v17, v2
1732- ; GFX9-NEXT: v_mul_f32_e32 v2 , 0xcf800000, v17
1733- ; GFX9-NEXT: v_add_f32_e32 v1, v2, v1
1734- ; GFX9-NEXT: v_cvt_u32_f32_e32 v18, v1
1728+ ; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v3 , vcc
1729+ ; GFX9-NEXT: v_mul_f32_e32 v3 , 0x5f7ffffc, v4
1730+ ; GFX9-NEXT: v_mul_f32_e32 v4 , 0x2f800000, v3
1731+ ; GFX9-NEXT: v_trunc_f32_e32 v17, v4
1732+ ; GFX9-NEXT: v_mul_f32_e32 v4 , 0xcf800000, v17
1733+ ; GFX9-NEXT: v_add_f32_e32 v3, v4, v3
1734+ ; GFX9-NEXT: v_cvt_u32_f32_e32 v18, v3
17351735; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7]
17361736; GFX9-NEXT: s_sub_u32 s5, 0, s2
17371737; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
1738- ; GFX9-NEXT: v_mad_u64_u32 v[1:2 ], s[0:1], s5, v18, 0
1739- ; GFX9-NEXT: v_cndmask_b32_e32 v13, v3 , v14, vcc
1738+ ; GFX9-NEXT: v_mad_u64_u32 v[3:4 ], s[0:1], s5, v18, 0
1739+ ; GFX9-NEXT: v_cndmask_b32_e32 v13, v5 , v14, vcc
17401740; GFX9-NEXT: v_cvt_u32_f32_e32 v14, v17
17411741; GFX9-NEXT: s_subb_u32 s20, 0, s3
17421742; GFX9-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc
17431743; GFX9-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc
1744- ; GFX9-NEXT: v_mad_u64_u32 v[2:3 ], s[0:1], s5, v14, v[2:3 ]
1744+ ; GFX9-NEXT: v_mad_u64_u32 v[4:5 ], s[0:1], s5, v14, v[4:5 ]
17451745; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9
1746- ; GFX9-NEXT: v_cndmask_b32_e64 v9, v4, v12, s[0:1]
1747- ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s20, v18, v[2:3]
1748- ; GFX9-NEXT: v_mul_lo_u32 v3, v14, v1
17491746; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v16, vcc
1750- ; GFX9-NEXT: v_mul_lo_u32 v4, v18, v2
1751- ; GFX9-NEXT: v_mul_hi_u32 v11, v18, v1
1752- ; GFX9-NEXT: v_mul_hi_u32 v1, v14, v1
1753- ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[0:1]
1754- ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4
1755- ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1756- ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v11
1757- ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1758- ; GFX9-NEXT: v_mul_lo_u32 v11, v14, v2
1759- ; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
1760- ; GFX9-NEXT: v_mul_hi_u32 v4, v18, v2
1761- ; GFX9-NEXT: v_mul_hi_u32 v2, v14, v2
1762- ; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v11, v1
1763- ; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1764- ; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v4
1765- ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1766- ; GFX9-NEXT: v_add_co_u32_e32 v1, vcc, v1, v3
1767- ; GFX9-NEXT: v_add_u32_e32 v4, v11, v4
1768- ; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1769- ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v18, v1
1770- ; GFX9-NEXT: v_add3_u32 v2, v4, v3, v2
1771- ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[18:19], s5, v11, 0
1772- ; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v14, v2, vcc
1773- ; GFX9-NEXT: v_mov_b32_e32 v1, v4
1774- ; GFX9-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[0:1]
1775- ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[0:1]
1776- ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v12, v[1:2]
1777- ; GFX9-NEXT: v_xor_b32_e32 v8, s16, v5
1778- ; GFX9-NEXT: v_xor_b32_e32 v9, s17, v9
1779- ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s20, v11, v[1:2]
1780- ; GFX9-NEXT: v_mov_b32_e32 v10, s17
1781- ; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s16, v8
1782- ; GFX9-NEXT: v_xor_b32_e32 v5, s4, v7
1783- ; GFX9-NEXT: v_mul_lo_u32 v7, v12, v3
1784- ; GFX9-NEXT: v_mul_lo_u32 v8, v11, v4
1785- ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v9, v10, vcc
1786- ; GFX9-NEXT: v_mul_hi_u32 v9, v11, v3
1787- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8
1747+ ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[18:19], s20, v18, v[4:5]
1748+ ; GFX9-NEXT: v_cndmask_b32_e64 v5, v8, v12, s[0:1]
1749+ ; GFX9-NEXT: v_mul_lo_u32 v8, v14, v3
1750+ ; GFX9-NEXT: v_mul_lo_u32 v9, v18, v4
1751+ ; GFX9-NEXT: v_mul_hi_u32 v11, v18, v3
1752+ ; GFX9-NEXT: v_mul_hi_u32 v3, v14, v3
1753+ ; GFX9-NEXT: v_cndmask_b32_e64 v7, v1, v7, s[0:1]
1754+ ; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v9
1755+ ; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1756+ ; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v11
17881757; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1789- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9
1790- ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1791- ; GFX9-NEXT: v_mul_lo_u32 v9, v12, v4
1792- ; GFX9-NEXT: v_mul_hi_u32 v3, v12, v3
1793- ; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
1794- ; GFX9-NEXT: v_mul_hi_u32 v8, v11, v4
1795- ; GFX9-NEXT: v_mul_hi_u32 v4, v12, v4
1796- ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v9, v3
1758+ ; GFX9-NEXT: v_mul_lo_u32 v11, v14, v4
1759+ ; GFX9-NEXT: v_add_u32_e32 v8, v9, v8
1760+ ; GFX9-NEXT: v_mul_hi_u32 v9, v18, v4
1761+ ; GFX9-NEXT: v_mul_hi_u32 v4, v14, v4
1762+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v11, v3
1763+ ; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1764+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v9
17971765; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
17981766; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8
1767+ ; GFX9-NEXT: v_add_u32_e32 v9, v11, v9
17991768; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1800- ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7
1801- ; GFX9-NEXT: v_add_u32_e32 v8, v9, v8
1769+ ; GFX9-NEXT: v_add3_u32 v4, v9, v8, v4
1770+ ; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v18, v3
1771+ ; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v14, v4, vcc
1772+ ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[18:19], s5, v8, 0
1773+ ; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v13, s[0:1]
1774+ ; GFX9-NEXT: v_cndmask_b32_e64 v10, v2, v10, s[0:1]
1775+ ; GFX9-NEXT: v_mov_b32_e32 v1, v4
1776+ ; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v9, v[1:2]
1777+ ; GFX9-NEXT: v_xor_b32_e32 v11, s17, v5
1778+ ; GFX9-NEXT: v_xor_b32_e32 v6, s16, v6
1779+ ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s20, v8, v[1:2]
1780+ ; GFX9-NEXT: v_mov_b32_e32 v12, s17
1781+ ; GFX9-NEXT: v_subrev_co_u32_e32 v1, vcc, s16, v6
1782+ ; GFX9-NEXT: v_xor_b32_e32 v5, s4, v7
1783+ ; GFX9-NEXT: v_mul_lo_u32 v6, v9, v3
1784+ ; GFX9-NEXT: v_mul_lo_u32 v7, v8, v4
1785+ ; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v11, v12, vcc
1786+ ; GFX9-NEXT: v_mul_hi_u32 v11, v8, v3
1787+ ; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v7
18021788; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1803- ; GFX9-NEXT: v_add3_u32 v4, v8, v7, v4
1789+ ; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v11
1790+ ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1791+ ; GFX9-NEXT: v_mul_lo_u32 v11, v9, v4
1792+ ; GFX9-NEXT: v_mul_hi_u32 v3, v9, v3
1793+ ; GFX9-NEXT: v_add_u32_e32 v6, v7, v6
1794+ ; GFX9-NEXT: v_mul_hi_u32 v7, v8, v4
1795+ ; GFX9-NEXT: v_mul_hi_u32 v4, v9, v4
18041796; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v11, v3
1805- ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v12, v4, vcc
1806- ; GFX9-NEXT: v_mul_lo_u32 v7, s9, v3
1807- ; GFX9-NEXT: v_mul_lo_u32 v8, s8, v4
1808- ; GFX9-NEXT: v_mul_hi_u32 v10, s8, v3
1797+ ; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1798+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7
1799+ ; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1800+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6
1801+ ; GFX9-NEXT: v_add_u32_e32 v7, v11, v7
1802+ ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1803+ ; GFX9-NEXT: v_add3_u32 v4, v7, v6, v4
1804+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v8, v3
1805+ ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v4, vcc
1806+ ; GFX9-NEXT: v_mul_lo_u32 v6, s9, v3
1807+ ; GFX9-NEXT: v_mul_lo_u32 v7, s8, v4
1808+ ; GFX9-NEXT: v_mul_hi_u32 v9, s8, v3
18091809; GFX9-NEXT: v_mul_hi_u32 v3, s9, v3
1810- ; GFX9-NEXT: v_mul_hi_u32 v12, s9, v4
1811- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8
1812- ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1813- ; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10
1810+ ; GFX9-NEXT: v_mul_hi_u32 v13, s9, v4
1811+ ; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v7
18141812; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1815- ; GFX9-NEXT: v_mul_lo_u32 v10, s9, v4
1816- ; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
1817- ; GFX9-NEXT: v_mul_hi_u32 v8, s8, v4
1818- ; GFX9-NEXT: v_xor_b32_e32 v6, s4, v6
1819- ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v10, v3
1820- ; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1821- ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8
1822- ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1823- ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v3, v7
1824- ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v11, 0
1825- ; GFX9-NEXT: v_mov_b32_e32 v9, s4
1813+ ; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v9
1814+ ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1815+ ; GFX9-NEXT: v_mul_lo_u32 v9, s9, v4
1816+ ; GFX9-NEXT: v_add_u32_e32 v6, v7, v6
1817+ ; GFX9-NEXT: v_mul_hi_u32 v7, s8, v4
1818+ ; GFX9-NEXT: v_xor_b32_e32 v10, s4, v10
1819+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v9, v3
1820+ ; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1821+ ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v7
18261822; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1823+ ; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v3, v6
1824+ ; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v11, 0
1825+ ; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1826+ ; GFX9-NEXT: v_add_u32_e32 v7, v9, v7
1827+ ; GFX9-NEXT: v_mov_b32_e32 v8, s4
18271828; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s4, v5
1828- ; GFX9-NEXT: v_add_u32_e32 v8, v10, v8
1829- ; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc
1830- ; GFX9-NEXT: v_add3_u32 v9, v8, v7, v12
1829+ ; GFX9-NEXT: v_add3_u32 v9, v7, v12, v13
1830+ ; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v10, v8, vcc
18311831; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[0:1], s2, v9, v[4:5]
18321832; GFX9-NEXT: v_mov_b32_e32 v10, s9
18331833; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s8, v3
0 commit comments