@@ -74,10 +74,11 @@ define bfloat @v_mad_mixlo_bf16_bf16lo_bf16lo_f32_clamp_post_cvt(bfloat %src0, b
7474; GFX1250: ; %bb.0:
7575; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
7676; GFX1250-NEXT: s_wait_kmcnt 0x0
77- ; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
78- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
79- ; GFX1250-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
77+ ; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
8078; GFX1250-NEXT: s_set_pc_i64 s[30:31]
79+
80+
81+
8182 %src0.ext = fpext bfloat %src0 to float
8283 %src1.ext = fpext bfloat %src1 to float
8384 %result = tail call float @llvm.fmuladd.f32 (float %src0.ext , float %src1.ext , float %src2 )
@@ -191,10 +192,11 @@ define <2 x bfloat> @v_mad_mix_v2f32_clamp_postcvt(<2 x bfloat> %src0, <2 x bflo
191192; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v2
192193; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
193194; GFX1250-NEXT: v_pk_fma_f32 v[0:1], v[4:5], v[6:7], v[0:1]
194- ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
195- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
196- ; GFX1250-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
195+ ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 clamp
197196; GFX1250-NEXT: s_set_pc_i64 s[30:31]
197+
198+
199+
198200 %src0.ext = fpext <2 x bfloat> %src0 to <2 x float >
199201 %src1.ext = fpext <2 x bfloat> %src1 to <2 x float >
200202 %src2.ext = fpext <2 x bfloat> %src2 to <2 x float >
@@ -247,12 +249,12 @@ define <4 x bfloat> @v_mad_mix_v4f32_clamp_postcvt(<4 x bfloat> %src0, <4 x bflo
247249; GFX1250-NEXT: v_pk_fma_f32 v[0:1], v[6:7], v[0:1], v[2:3]
248250; GFX1250-NEXT: v_pk_fma_f32 v[2:3], v[8:9], v[10:11], v[12:13]
249251; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
250- ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
251- ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, v3
252- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
253- ; GFX1250-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
254- ; GFX1250-NEXT: v_pk_max_num_bf16 v1, v1, v1 clamp
252+ ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 clamp
253+ ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, v3 clamp
255254; GFX1250-NEXT: s_set_pc_i64 s[30:31]
255+
256+
257+
256258 %src0.ext = fpext <4 x bfloat> %src0 to <4 x float >
257259 %src1.ext = fpext <4 x bfloat> %src1 to <4 x float >
258260 %src2.ext = fpext <4 x bfloat> %src2 to <4 x float >
0 commit comments