@@ -1947,16 +1947,14 @@ define <2 x i64> @lshr_mad_i64_vec(<2 x i64> %arg0) #0 {
19471947; CI-LABEL: lshr_mad_i64_vec:
19481948; CI: ; %bb.0:
19491949; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1950- ; CI-NEXT: v_mov_b32_e32 v6, v3
1951- ; CI-NEXT: v_mov_b32_e32 v3, v1
1952- ; CI-NEXT: v_mov_b32_e32 v1, 0
19531950; CI-NEXT: s_mov_b32 s4, 0xffff1c18
1954- ; CI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, s4, v[0:1]
1955- ; CI-NEXT: v_mov_b32_e32 v3, v1
1951+ ; CI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[0:1]
19561952; CI-NEXT: s_mov_b32 s4, 0xffff1118
1957- ; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, v[2:3]
1953+ ; CI-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, s4, v[2:3]
1954+ ; CI-NEXT: v_sub_i32_e32 v1, vcc, v5, v1
1955+ ; CI-NEXT: v_sub_i32_e32 v3, vcc, v7, v3
19581956; CI-NEXT: v_mov_b32_e32 v0, v4
1959- ; CI-NEXT: v_mov_b32_e32 v1, v5
1957+ ; CI-NEXT: v_mov_b32_e32 v2, v6
19601958; CI-NEXT: s_setpc_b64 s[30:31]
19611959;
19621960; SI-LABEL: lshr_mad_i64_vec:
@@ -1979,44 +1977,28 @@ define <2 x i64> @lshr_mad_i64_vec(<2 x i64> %arg0) #0 {
19791977; GFX9-LABEL: lshr_mad_i64_vec:
19801978; GFX9: ; %bb.0:
19811979; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982- ; GFX9-NEXT: v_mov_b32_e32 v6, v3
1983- ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1984- ; GFX9-NEXT: v_mov_b32_e32 v1, 0
19851980; GFX9-NEXT: s_mov_b32 s4, 0xffff1c18
1986- ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v3, s4, v[0:1]
1987- ; GFX9-NEXT: v_mov_b32_e32 v3, v1
1981+ ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, s4, v[0:1]
19881982; GFX9-NEXT: s_mov_b32 s4, 0xffff1118
1989- ; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s4, v[2:3]
1983+ ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, s4, v[2:3]
1984+ ; GFX9-NEXT: v_sub_u32_e32 v1, v5, v1
1985+ ; GFX9-NEXT: v_sub_u32_e32 v3, v7, v3
19901986; GFX9-NEXT: v_mov_b32_e32 v0, v4
1991- ; GFX9-NEXT: v_mov_b32_e32 v1, v5
1987+ ; GFX9-NEXT: v_mov_b32_e32 v2, v6
19921988; GFX9-NEXT: s_setpc_b64 s[30:31]
19931989;
1994- ; GFX1100-LABEL: lshr_mad_i64_vec:
1995- ; GFX1100: ; %bb.0:
1996- ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1997- ; GFX1100-NEXT: v_mov_b32_e32 v8, v3
1998- ; GFX1100-NEXT: v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v1, 0
1999- ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2000- ; GFX1100-NEXT: v_mad_u64_u32 v[4:5], null, 0xffff1c18, v6, v[0:1]
2001- ; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, v4
2002- ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2003- ; GFX1100-NEXT: v_mad_u64_u32 v[6:7], null, 0xffff1118, v8, v[2:3]
2004- ; GFX1100-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_mov_b32 v2, v6
2005- ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2)
2006- ; GFX1100-NEXT: v_mov_b32_e32 v3, v7
2007- ; GFX1100-NEXT: s_setpc_b64 s[30:31]
2008- ;
2009- ; GFX1150-LABEL: lshr_mad_i64_vec:
2010- ; GFX1150: ; %bb.0:
2011- ; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012- ; GFX1150-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v5, v1
2013- ; GFX1150-NEXT: v_mov_b32_e32 v1, 0
2014- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2015- ; GFX1150-NEXT: v_mov_b32_e32 v3, v1
2016- ; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, 0xffff1c18, v5, v[0:1]
2017- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2)
2018- ; GFX1150-NEXT: v_mad_u64_u32 v[2:3], null, 0xffff1118, v4, v[2:3]
2019- ; GFX1150-NEXT: s_setpc_b64 s[30:31]
1990+ ; GFX11-LABEL: lshr_mad_i64_vec:
1991+ ; GFX11: ; %bb.0:
1992+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993+ ; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, 0xffff1c18, v1, v[0:1]
1994+ ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, 0xffff1118, v3, v[2:3]
1995+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
1996+ ; GFX11-NEXT: v_sub_nc_u32_e32 v1, v5, v1
1997+ ; GFX11-NEXT: v_mov_b32_e32 v0, v4
1998+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
1999+ ; GFX11-NEXT: v_sub_nc_u32_e32 v3, v7, v3
2000+ ; GFX11-NEXT: v_mov_b32_e32 v2, v6
2001+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
20202002;
20212003; GFX12-LABEL: lshr_mad_i64_vec:
20222004; GFX12: ; %bb.0:
@@ -2025,13 +2007,14 @@ define <2 x i64> @lshr_mad_i64_vec(<2 x i64> %arg0) #0 {
20252007; GFX12-NEXT: s_wait_samplecnt 0x0
20262008; GFX12-NEXT: s_wait_bvhcnt 0x0
20272009; GFX12-NEXT: s_wait_kmcnt 0x0
2028- ; GFX12-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v5, v1
2029- ; GFX12-NEXT: v_mov_b32_e32 v1, 0
2030- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2031- ; GFX12-NEXT: v_mov_b32_e32 v3, v1
2032- ; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, 0xffff1c18, v5, v[0:1]
2033- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
2034- ; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xffff1118, v4, v[2:3]
2010+ ; GFX12-NEXT: v_mad_co_u64_u32 v[4:5], null, 0xffff1c18, v1, v[0:1]
2011+ ; GFX12-NEXT: v_mad_co_u64_u32 v[6:7], null, 0xffff1118, v3, v[2:3]
2012+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
2013+ ; GFX12-NEXT: v_sub_nc_u32_e32 v1, v5, v1
2014+ ; GFX12-NEXT: v_mov_b32_e32 v0, v4
2015+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
2016+ ; GFX12-NEXT: v_sub_nc_u32_e32 v3, v7, v3
2017+ ; GFX12-NEXT: v_mov_b32_e32 v2, v6
20352018; GFX12-NEXT: s_setpc_b64 s[30:31]
20362019 %lsh = lshr <2 x i64 > %arg0 , <i64 32 , i64 32 >
20372020 %mul = mul <2 x i64 > %lsh , <i64 s0xffffffffffff1c18, i64 s0xffffffffffff1118>
0 commit comments