@@ -1669,40 +1669,38 @@ define amdgpu_kernel void @notdot4_mixedtypes(ptr addrspace(1) %src1,
16691669; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes:
16701670; GFX11-DL-TRUE16: ; %bb.0: ; %entry
16711671; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1672- ; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1672+ ; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
16731673; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1674- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1674+ ; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v6, 0
1675+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
16751676; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
16761677; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
16771678; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1678- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3 , v0, s[0:1]
1679- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4 , v0, s[2:3]
1680- ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5 , s[4:5]
1679+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4 , v0, s[0:1]
1680+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v5 , v0, s[2:3]
1681+ ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v6 , s[4:5]
16811682; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1682- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
1683+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4
16831684; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1684- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
1685- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
1686- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l
1685+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v5
1686+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
1687+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
16871688; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
16881689; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
16891690; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l
1690- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
1691+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
16911692; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
1692- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6 , v7, 0, 8
1693+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v3.l , v7.l
16931694; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
16941695; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
1695- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1696- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
1697- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
1698- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
1699- ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v2.l, v0.l
1700- ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v4, v4, 0xc0c0302
1701- ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v3, v3, 0xc0c0302
1696+ ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v5, v5, 0xc0c0302
1697+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1698+ ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v3.l, v0.l
1699+ ; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v4, v4, 0xc0c0302
17021700; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
17031701; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
17041702; GFX11-DL-TRUE16-NEXT: v_dot4_u32_u8 v0, v2, v1, v0
1705- ; GFX11-DL-TRUE16-NEXT: global_store_b16 v5 , v0, s[4:5]
1703+ ; GFX11-DL-TRUE16-NEXT: global_store_b16 v6 , v0, s[4:5]
17061704; GFX11-DL-TRUE16-NEXT: s_endpgm
17071705;
17081706; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes:
@@ -1964,44 +1962,41 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1,
19641962; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes2:
19651963; GFX11-DL-TRUE16: ; %bb.0: ; %entry
19661964; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1967- ; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1965+ ; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0
19681966; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1969- ; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v4, 0
1970- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1967+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
19711968; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
19721969; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
19731970; GFX11-DL-TRUE16-NEXT: s_clause 0x1
1974- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v2 , v0, s[2:3]
1975- ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3 , v0, s[0:1]
1976- ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v4 , s[4:5]
1971+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3 , v0, s[2:3]
1972+ ; GFX11-DL-TRUE16-NEXT: global_load_b32 v4 , v0, s[0:1]
1973+ ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5 , s[4:5]
19771974; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2)
1978- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
1975+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3
19791976; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1)
1980- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v3.l
1981- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3
1982- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.h
1983- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3
1977+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4
1978+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v4, 0, 8
1979+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h
19841980; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
1985- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
1986- ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
1987- ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v2
1988- ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
1981+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1982+ ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
1983+ ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.l
19891984; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
1990- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l
1985+ ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
1986+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3
19911987; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0)
19921988; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l
1993- ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
1994- ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.h
1989+ ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.h
19951990; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
1996- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1991+ ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v4
1992+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
19971993; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v1.h, v0.l
1998- ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
1999- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1994+ ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
20001995; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.h, v0.l
1996+ ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
20011997; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
2002- ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
20031998; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.l, v1.l, v0.l
2004- ; GFX11-DL-TRUE16-NEXT: global_store_b16 v4 , v0, s[4:5]
1999+ ; GFX11-DL-TRUE16-NEXT: global_store_b16 v5 , v0, s[4:5]
20052000; GFX11-DL-TRUE16-NEXT: s_endpgm
20062001;
20072002; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes2:
0 commit comments