@@ -944,6 +944,30 @@ let SubtargetPredicate = isGFX11Plus in {
944944 defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
945945} // End SubtargetPredicate = isGFX11Plus
946946
947+ // FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
948+ // instead of less complex f16. Disable GlobalISel for these for now.
949+ def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> {
950+ let GISelPredicateCode = [{return false;}];
951+ }
952+
953+ let SubtargetPredicate = HasBF16ConversionInsts in {
954+ let ReadsModeReg = 0 in {
955+ defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
956+ }
957+ def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
958+ (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
959+ def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
960+ (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
961+ 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
962+ def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
963+ (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
964+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
965+ def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
966+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
967+ def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
968+ (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
969+ }
970+
947971let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
948972 defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
949973 defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1721,5 +1745,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
17211745
17221746defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
17231747defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
1748+ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
17241749defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
17251750defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
0 commit comments