@@ -545,12 +545,10 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %
545545; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546546; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
547547; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
548- ; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
549- ; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
550- ; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
551- ; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
552- ; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
553- ; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
548+ ; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
549+ ; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
550+ ; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
551+ ; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
554552; GFX9-NEXT: s_setpc_b64 s[30:31]
555553;
556554; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul:
@@ -565,25 +563,21 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %
565563; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566564; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
567565; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
568- ; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
569- ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
570- ; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
571- ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
572- ; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
573- ; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
566+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
567+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
568+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
569+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
574570; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
575571;
576572; GFX10-LABEL: test_v4f16_sub_mul:
577573; GFX10: ; %bb.0: ; %.entry
578574; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579575; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
580576; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
581- ; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
582- ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
583- ; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
584- ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
585- ; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
586- ; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
577+ ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
578+ ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
579+ ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
580+ ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
587581; GFX10-NEXT: s_setpc_b64 s[30:31]
588582;
589583; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul:
@@ -598,12 +592,10 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %
598592; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599593; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
600594; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
601- ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
602- ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
603- ; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
604- ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
605- ; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
606- ; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
595+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
596+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
597+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
598+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
607599; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
608600;
609601; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul:
@@ -644,12 +636,12 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal
644636; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645637; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
646638; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
647- ; GFX9-NEXT: v_sub_f16_e32 v2 , v4, v0
648- ; GFX9-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
649- ; GFX9-NEXT: v_sub_f16_e32 v3, v5, v1
650- ; GFX9-NEXT: v_sub_f16_sdwa v1 , v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
651- ; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
652- ; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
639+ ; GFX9-NEXT: v_sub_f16_sdwa v4 , v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
640+ ; GFX9-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
641+ ; GFX9-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
642+ ; GFX9-NEXT: v_sub_f16_sdwa v5 , v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
643+ ; GFX9-NEXT: v_mov_b32_e32 v0, v4
644+ ; GFX9-NEXT: v_mov_b32_e32 v1, v5
653645; GFX9-NEXT: s_setpc_b64 s[30:31]
654646;
655647; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
@@ -664,25 +656,25 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal
664656; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665657; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
666658; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
667- ; GFX9-DENORM-NEXT: v_sub_f16_e32 v2 , v4, v0
668- ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
669- ; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
670- ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1 , v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
671- ; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
672- ; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
659+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v4 , v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
660+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
661+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
662+ ; GFX9-DENORM-NEXT: v_sub_f16_sdwa v5 , v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
663+ ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v4
664+ ; GFX9-DENORM-NEXT: v_mov_b32_e32 v1, v5
673665; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
674666;
675667; GFX10-LABEL: test_v4f16_sub_mul_rhs:
676668; GFX10: ; %bb.0: ; %.entry
677669; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678670; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
679671; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
680- ; GFX10-NEXT: v_sub_f16_e32 v2 , v4, v0
681- ; GFX10-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
682- ; GFX10-NEXT: v_sub_f16_e32 v3, v5, v1
683- ; GFX10-NEXT: v_sub_f16_sdwa v1 , v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
684- ; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
685- ; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
672+ ; GFX10-NEXT: v_sub_f16_sdwa v4 , v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
673+ ; GFX10-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
674+ ; GFX10-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
675+ ; GFX10-NEXT: v_sub_f16_sdwa v5 , v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
676+ ; GFX10-NEXT: v_mov_b32_e32 v0, v4
677+ ; GFX10-NEXT: v_mov_b32_e32 v1, v5
686678; GFX10-NEXT: s_setpc_b64 s[30:31]
687679;
688680; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
@@ -697,12 +689,12 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal
697689; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698690; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
699691; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
700- ; GFX10-DENORM-NEXT: v_sub_f16_e32 v2 , v4, v0
701- ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
702- ; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
703- ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1 , v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
704- ; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
705- ; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
692+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v4 , v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
693+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
694+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
695+ ; GFX10-DENORM-NEXT: v_sub_f16_sdwa v5 , v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
696+ ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4
697+ ; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5
706698; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
707699;
708700; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
0 commit comments