@@ -1071,55 +1071,51 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
10711071; SDAG-GFX1100-TRUE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
10721072; SDAG-GFX1100-TRUE16: ; %bb.0:
10731073; SDAG-GFX1100-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1074- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1075- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1074+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l
10761075; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
1077- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l
1078- ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1079- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
1080- ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
1081- ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1082- ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v2, v3, v5, v4 op_sel_hi:[1,1,1]
1083- ; SDAG-GFX1100-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, 0
1076+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.l
1077+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l
1078+ ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
1079+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v1, v1, v6, v5 op_sel_hi:[1,1,1]
1080+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixlo_f16 v3, v3, v7, v8 op_sel_hi:[1,1,1] clamp
10841081; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1085- ; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v0, v2, v2 clamp
10861082; SDAG-GFX1100-TRUE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1083+ ; SDAG-GFX1100-TRUE16-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1084+ ; SDAG-GFX1100-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1085+ ; SDAG-GFX1100-TRUE16-NEXT: v_mov_b32_e32 v0, v3
10871086; SDAG-GFX1100-TRUE16-NEXT: s_setpc_b64 s[30:31]
10881087;
10891088; SDAG-GFX1100-FAKE16-LABEL: v_mad_mix_v3f32_clamp_postcvt:
10901089; SDAG-GFX1100-FAKE16: ; %bb.0:
10911090; SDAG-GFX1100-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1091+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
10931092; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
10941093; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1095- ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1096- ; SDAG-GFX1100-FAKE16-NEXT: v_pack_b32_f16 v1, v1, 0
1097- ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1098- ; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1094+ ; SDAG-GFX1100-FAKE16-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
10991095; SDAG-GFX1100-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1096+ ; SDAG-GFX1100-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
1097+ ; SDAG-GFX1100-FAKE16-NEXT: v_mov_b32_e32 v0, v6
11001098; SDAG-GFX1100-FAKE16-NEXT: s_setpc_b64 s[30:31]
11011099;
1102- ; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1103- ; SDAG-GFX900: ; %bb.0:
1104- ; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1105- ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1106- ; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1107- ; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0
1108- ; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1109- ; SDAG-GFX900-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1110- ; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1111- ; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31]
1100+ ; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1101+ ; GFX900: ; %bb.0:
1102+ ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103+ ; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1104+ ; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1105+ ; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1106+ ; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1107+ ; GFX900-NEXT: v_mov_b32_e32 v0, v3
1108+ ; GFX900-NEXT: s_setpc_b64 s[30:31]
11121109;
1113- ; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1114- ; SDAG-GFX906: ; %bb.0:
1115- ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116- ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
1117- ; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1118- ; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0
1119- ; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
1120- ; SDAG-GFX906-NEXT: v_pk_max_f16 v0, v6, v6 clamp
1121- ; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1122- ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
1110+ ; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1111+ ; GFX906: ; %bb.0:
1112+ ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1113+ ; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1114+ ; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1115+ ; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1116+ ; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1117+ ; GFX906-NEXT: v_mov_b32_e32 v0, v3
1118+ ; GFX906-NEXT: s_setpc_b64 s[30:31]
11231119;
11241120; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
11251121; SDAG-VI: ; %bb.0:
@@ -1193,26 +1189,6 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s
11931189; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v6
11941190; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31]
11951191;
1196- ; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1197- ; GISEL-GFX900: ; %bb.0:
1198- ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199- ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1200- ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1201- ; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1202- ; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1203- ; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3
1204- ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
1205- ;
1206- ; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt:
1207- ; GISEL-GFX906: ; %bb.0:
1208- ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209- ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1]
1210- ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp
1211- ; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1212- ; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp
1213- ; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
1214- ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
1215- ;
12161192; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
12171193; GISEL-VI: ; %bb.0:
12181194; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
0 commit comments