@@ -480,11 +480,9 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
480480; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
481481; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
482482; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
483- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
484- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
485- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
486- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
487- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
483+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
484+ ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
485+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
488486; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
489487;
490488; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
@@ -610,12 +608,11 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
610608; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
611609; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
612610; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
613- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
614- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
615- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
616611; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
617- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
618- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
612+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
613+ ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
614+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
615+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
619616; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
620617;
621618; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
@@ -744,12 +741,11 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
744741; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v3.l
745742; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
746743; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
747- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
748- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
749- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
750744; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
745+ ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
751746; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v2.l
752- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v3, v0
747+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
748+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
753749; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
754750;
755751; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -900,7 +896,7 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
900896; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
901897; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
902898; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
903- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
899+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
904900; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
905901;
906902; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
@@ -1043,24 +1039,21 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
10431039; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
10441040; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
10451041; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
1042+ ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
10461043; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
10471044; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
1048- ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
10491045; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
10501046; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6.l, v3.l
1051- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0 .l, v2 .l
1047+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7 .l, v5 .l
10521048; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1049+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
10531050; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
1054- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v5.l
1055- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1056- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
1057- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
1058- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
1059- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1060- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
1061- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
1062- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1063- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
1051+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1052+ ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
1053+ ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.h
1054+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1055+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
1056+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
10641057; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
10651058;
10661059; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
@@ -1257,8 +1250,8 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
12571250; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
12581251; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
12591252; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1260- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
1261- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
1253+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
1254+ ; GFX11-SDAG-TRUE16-NEXT: v_perm_b32 v1, v1, v3, 0x5040100
12621255; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
12631256;
12641257; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
0 commit comments