@@ -74,10 +74,11 @@ define bfloat @v_mad_mixlo_bf16_bf16lo_bf16lo_f32_clamp_post_cvt(bfloat %src0, b
74
74
; GFX1250: ; %bb.0:
75
75
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
76
76
; GFX1250-NEXT: s_wait_kmcnt 0x0
77
- ; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,0]
78
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
79
- ; GFX1250-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
77
+ ; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp
80
78
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
79
+
80
+
81
+
81
82
%src0.ext = fpext bfloat %src0 to float
82
83
%src1.ext = fpext bfloat %src1 to float
83
84
%result = tail call float @llvm.fmuladd.f32 (float %src0.ext , float %src1.ext , float %src2 )
@@ -191,10 +192,11 @@ define <2 x bfloat> @v_mad_mix_v2f32_clamp_postcvt(<2 x bfloat> %src0, <2 x bflo
191
192
; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v2
192
193
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
193
194
; GFX1250-NEXT: v_pk_fma_f32 v[0:1], v[4:5], v[6:7], v[0:1]
194
- ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
195
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
196
- ; GFX1250-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
195
+ ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 clamp
197
196
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
197
+
198
+
199
+
198
200
%src0.ext = fpext <2 x bfloat> %src0 to <2 x float >
199
201
%src1.ext = fpext <2 x bfloat> %src1 to <2 x float >
200
202
%src2.ext = fpext <2 x bfloat> %src2 to <2 x float >
@@ -247,12 +249,12 @@ define <4 x bfloat> @v_mad_mix_v4f32_clamp_postcvt(<4 x bfloat> %src0, <4 x bflo
247
249
; GFX1250-NEXT: v_pk_fma_f32 v[0:1], v[6:7], v[0:1], v[2:3]
248
250
; GFX1250-NEXT: v_pk_fma_f32 v[2:3], v[8:9], v[10:11], v[12:13]
249
251
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
250
- ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
251
- ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, v3
252
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
253
- ; GFX1250-NEXT: v_pk_max_num_bf16 v0, v0, v0 clamp
254
- ; GFX1250-NEXT: v_pk_max_num_bf16 v1, v1, v1 clamp
252
+ ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 clamp
253
+ ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, v3 clamp
255
254
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
255
+
256
+
257
+
256
258
%src0.ext = fpext <4 x bfloat> %src0 to <4 x float >
257
259
%src1.ext = fpext <4 x bfloat> %src1 to <4 x float >
258
260
%src2.ext = fpext <4 x bfloat> %src2 to <4 x float >
0 commit comments