@@ -175,14 +175,12 @@ define half @v_fdiv_f16(half %a, half %b) {
175175; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176176; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
177177; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
178- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
179- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
180178; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
181179; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
182180; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
183- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
184- ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
185- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
181+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
182+ ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
183+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
186184; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
187185; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
188186; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -213,14 +211,12 @@ define half @v_fdiv_f16(half %a, half %b) {
213211; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214212; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
215213; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
216- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
217- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
218214; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
219215; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
220216; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
221- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
222- ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
223- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
217+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
218+ ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
219+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
224220; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
225221; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
226222; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -491,14 +487,12 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
491487; GFX11-IEEE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492488; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
493489; GFX11-IEEE-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
494- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
495- ; GFX11-IEEE-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
496490; GFX11-IEEE-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
497491; GFX11-IEEE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
498492; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
499- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
500- ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
501- ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
493+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
494+ ; GFX11-IEEE-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
495+ ; GFX11-IEEE-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
502496; GFX11-IEEE-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
503497; GFX11-IEEE-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
504498; GFX11-IEEE-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
@@ -529,14 +523,12 @@ define half @v_fdiv_f16_ulp25(half %a, half %b) {
529523; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530524; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v2, v1.l
531525; GFX11-FLUSH-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v0.l
532- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l
533- ; GFX11-FLUSH-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l
534526; GFX11-FLUSH-TRUE16-NEXT: v_rcp_f32_e32 v2, v2
535527; GFX11-FLUSH-TRUE16-NEXT: s_waitcnt_depctr 0xfff
536528; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v2
537- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v6 , -v4 , v3, v5 op_sel_hi:[1,0,1]
538- ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v6 , v2
539- ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v4 , v3, v5 op_sel_hi:[1,0,1]
529+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4 , -v1 , v3, v0 op_sel_hi:[1,0,1]
530+ ; GFX11-FLUSH-TRUE16-NEXT: v_fmac_f32_e32 v3, v4 , v2
531+ ; GFX11-FLUSH-TRUE16-NEXT: v_fma_mix_f32 v4, -v1 , v3, v0 op_sel_hi:[1,0,1]
540532; GFX11-FLUSH-TRUE16-NEXT: v_mul_f32_e32 v2, v4, v2
541533; GFX11-FLUSH-TRUE16-NEXT: v_and_b32_e32 v2, 0xff800000, v2
542534; GFX11-FLUSH-TRUE16-NEXT: v_add_f32_e32 v2, v2, v3
0 commit comments