@@ -480,11 +480,8 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
480480; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
481481; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
482482; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
483- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
484- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
485- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
486483; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
487- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1 , v0
484+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l , v0.h
488485; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
489486;
490487; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
@@ -604,12 +601,9 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
604601; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
605602; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
606603; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
607- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
608- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3.l, v2.l
609- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
610604; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
611- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
612- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1 , v0
605+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v3. l, v2.l
606+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l , v0.h
613607; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
614608;
615609; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
@@ -731,13 +725,10 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
731725; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
732726; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v3.l
733727; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
734- ; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v4, s0, 0x7fff
735- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
736- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
737- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
738- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
739- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
740- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
728+ ; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
729+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
730+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
731+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v2.l
741732; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
742733;
743734; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -804,13 +795,13 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
804795; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v5, 0x7fff
805796; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
806797; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
798+ ; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v5
807799; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
808800; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
809- ; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v5
810- ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
801+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
811802; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
803+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
812804; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6.l, v3.l
813- ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
814805; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
815806;
816807; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -877,12 +868,9 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
877868; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
878869; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
879870; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
880- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3 )
871+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1 )
881872; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v5.l, v4.l
882- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
883- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
884- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
885- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
873+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
886874; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
887875;
888876; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
@@ -937,8 +925,8 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
937925; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
938926; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
939927; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
940- ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
941928; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
929+ ; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
942930; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
943931; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v4.l, v5.l
944932; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
@@ -1016,27 +1004,21 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
10161004; GFX11-SDAG-TRUE16: ; %bb.0:
10171005; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10181006; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
1019- ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1007+ ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
10201008; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
10211009; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
1010+ ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
10221011; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
10231012; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
1024- ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1013+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v6.l, v5.l
10251014; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1026- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v6 .l, v3.l
1015+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v7 .l, v3.l
10271016; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
1028- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4 )
1017+ ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2 )
10291018; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v4.l
1030- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v5.l
1031- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
1032- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
1033- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
1034- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
1035- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
1036- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
1037- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
1019+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
10381020; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1039- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3 , v1
1021+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l , v1.h
10401022; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
10411023;
10421024; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
@@ -1209,20 +1191,14 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
12091191; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
12101192; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
12111193; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1194+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
12121195; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
1213- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.l, v3.l
12141196; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1215- ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l , v6.l, v5.l
1197+ ; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h , v6.l, v5.l
12161198; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v7.l, v4.l
1217- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1218- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
1219- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
1220- ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1221- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
1222- ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
12231199; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1224- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2 , v0
1225- ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3 , v1
1200+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l , v0.h
1201+ ; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l , v1.h
12261202; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
12271203;
12281204; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
0 commit comments