@@ -1791,25 +1791,21 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
17911791; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17921792; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
17931793; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
1794- ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
1795- ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
17961794; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
17971795; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
1798- ; GFX10-NEXT: global_load_dword v4, v[2:3], off
1799- ; GFX10-NEXT: global_load_dword v9, v[0:1], off
1796+ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
1797+ ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
1798+ ; GFX10-NEXT: global_load_dword v4, v[0:1], off
1799+ ; GFX10-NEXT: global_load_dword v9, v[2:3], off
18001800; GFX10-NEXT: s_waitcnt vmcnt(1)
1801- ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4
1802- ; GFX10-NEXT: s_waitcnt vmcnt(0)
1803- ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v9
1804- ; GFX10-NEXT: v_ashrrev_i16 v2, 8, v9
1805- ; GFX10-NEXT: v_ashrrev_i16 v3, 8, v4
1806- ; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x6010205
1807- ; GFX10-NEXT: v_bfe_i32 v10, v0, 0, 8
1808- ; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8
1809- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1810- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1811- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1812- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1801+ ; GFX10-NEXT: v_ashrrev_i16 v0, 8, v4
1802+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1803+ ; GFX10-NEXT: v_ashrrev_i16 v10, 8, v9
1804+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1805+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1806+ ; GFX10-NEXT: v_perm_b32 v4, v9, v4, 0x6010205
1807+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1808+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
18131809; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
18141810; GFX10-NEXT: global_store_dword v[5:6], v4, off
18151811; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1821,24 +1817,20 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
18211817; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v4
18221818; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
18231819; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1824- ; GFX9-NEXT: global_load_dword v9, v[0:1], off
1825- ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v4
1826- ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
1820+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
1821+ ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
18271822; GFX9-NEXT: global_load_dword v4, v[0:1], off
1823+ ; GFX9-NEXT: global_load_dword v9, v[2:3], off
18281824; GFX9-NEXT: s_mov_b32 s4, 0x6010205
18291825; GFX9-NEXT: s_waitcnt vmcnt(1)
1830- ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v9
1831- ; GFX9-NEXT: v_ashrrev_i16_e32 v1, 8, v9
1832- ; GFX9-NEXT: v_bfe_i32 v10, v0, 0, 8
1833- ; GFX9-NEXT: s_waitcnt vmcnt(0)
1834- ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4
1835- ; GFX9-NEXT: v_ashrrev_i16_e32 v3, 8, v4
1836- ; GFX9-NEXT: v_bfe_i32 v11, v2, 0, 8
1837- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1838- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1839- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1840- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1841- ; GFX9-NEXT: v_perm_b32 v4, v4, v9, s4
1826+ ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v4
1827+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
1828+ ; GFX9-NEXT: v_ashrrev_i16_e32 v10, 8, v9
1829+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1830+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1831+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1832+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1833+ ; GFX9-NEXT: v_perm_b32 v4, v9, v4, s4
18421834; GFX9-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
18431835; GFX9-NEXT: global_store_dword v[5:6], v4, off
18441836; GFX9-NEXT: s_waitcnt vmcnt(0)
0 commit comments