Skip to content

Commit 98bdc1d

Browse files
committed
Update the LIT tests to accomodate the patch effects.
1 parent 3d83665 commit 98bdc1d

19 files changed

+594
-613
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -545,12 +545,10 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %
545545
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546546
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
547547
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
548-
; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
549-
; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
550-
; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
551-
; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
552-
; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
553-
; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
548+
; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
549+
; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
550+
; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
551+
; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
554552
; GFX9-NEXT: s_setpc_b64 s[30:31]
555553
;
556554
; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul:
@@ -565,25 +563,21 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %
565563
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566564
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
567565
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
568-
; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
569-
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
570-
; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
571-
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
572-
; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
573-
; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
566+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
567+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
568+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
569+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
574570
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
575571
;
576572
; GFX10-LABEL: test_v4f16_sub_mul:
577573
; GFX10: ; %bb.0: ; %.entry
578574
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579575
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
580576
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
581-
; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
582-
; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
583-
; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
584-
; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
585-
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
586-
; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
577+
; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
578+
; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
579+
; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
580+
; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
587581
; GFX10-NEXT: s_setpc_b64 s[30:31]
588582
;
589583
; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul:
@@ -598,12 +592,10 @@ define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %
598592
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599593
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
600594
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
601-
; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
602-
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
603-
; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
604-
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
605-
; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
606-
; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
595+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
596+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
597+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
598+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
607599
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
608600
;
609601
; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul:
@@ -644,12 +636,12 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal
644636
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645637
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
646638
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
647-
; GFX9-NEXT: v_sub_f16_e32 v2, v4, v0
648-
; GFX9-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
649-
; GFX9-NEXT: v_sub_f16_e32 v3, v5, v1
650-
; GFX9-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
651-
; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
652-
; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
639+
; GFX9-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
640+
; GFX9-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
641+
; GFX9-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
642+
; GFX9-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
643+
; GFX9-NEXT: v_mov_b32_e32 v0, v4
644+
; GFX9-NEXT: v_mov_b32_e32 v1, v5
653645
; GFX9-NEXT: s_setpc_b64 s[30:31]
654646
;
655647
; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
@@ -664,25 +656,25 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal
664656
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665657
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
666658
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
667-
; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
668-
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
669-
; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
670-
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
671-
; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
672-
; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
659+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
660+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
661+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
662+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
663+
; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v4
664+
; GFX9-DENORM-NEXT: v_mov_b32_e32 v1, v5
673665
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
674666
;
675667
; GFX10-LABEL: test_v4f16_sub_mul_rhs:
676668
; GFX10: ; %bb.0: ; %.entry
677669
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678670
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
679671
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
680-
; GFX10-NEXT: v_sub_f16_e32 v2, v4, v0
681-
; GFX10-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
682-
; GFX10-NEXT: v_sub_f16_e32 v3, v5, v1
683-
; GFX10-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
684-
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
685-
; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
672+
; GFX10-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
673+
; GFX10-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
674+
; GFX10-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
675+
; GFX10-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
676+
; GFX10-NEXT: v_mov_b32_e32 v0, v4
677+
; GFX10-NEXT: v_mov_b32_e32 v1, v5
686678
; GFX10-NEXT: s_setpc_b64 s[30:31]
687679
;
688680
; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
@@ -697,12 +689,12 @@ define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x hal
697689
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698690
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
699691
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
700-
; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0
701-
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
702-
; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1
703-
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
704-
; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
705-
; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
692+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
693+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
694+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v4, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
695+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v5, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
696+
; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4
697+
; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5
706698
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
707699
;
708700
; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,10 @@ define <4 x half> @test_v4f16_sub_ext_neg_mul(<4 x half> %x, <4 x half> %y, <4 x
221221
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222222
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
223223
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
224-
; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4
225-
; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
226-
; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5
227-
; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
228-
; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0
229-
; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1
224+
; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
225+
; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
226+
; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
227+
; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
230228
; GFX9-NEXT: s_setpc_b64 s[30:31]
231229
;
232230
; GFX9-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
@@ -241,25 +239,21 @@ define <4 x half> @test_v4f16_sub_ext_neg_mul(<4 x half> %x, <4 x half> %y, <4 x
241239
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242240
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
243241
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
244-
; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
245-
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
246-
; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
247-
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
248-
; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
249-
; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
242+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
243+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
244+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
245+
; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
250246
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
251247
;
252248
; GFX10-LABEL: test_v4f16_sub_ext_neg_mul:
253249
; GFX10: ; %bb.0: ; %entry
254250
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255251
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
256252
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
257-
; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4
258-
; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
259-
; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5
260-
; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
261-
; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0
262-
; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1
253+
; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
254+
; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
255+
; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
256+
; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
263257
; GFX10-NEXT: s_setpc_b64 s[30:31]
264258
;
265259
; GFX10-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
@@ -274,12 +268,10 @@ define <4 x half> @test_v4f16_sub_ext_neg_mul(<4 x half> %x, <4 x half> %y, <4 x
274268
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275269
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
276270
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
277-
; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4
278-
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
279-
; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5
280-
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
281-
; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0
282-
; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1
271+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
272+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:DWORD
273+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
274+
; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1
283275
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
284276
entry:
285277
%a = fmul <4 x half> %x, %y

0 commit comments

Comments
 (0)