@@ -937,6 +937,30 @@ let SubtargetPredicate = isGFX11Plus in {
937937 defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
938938} // End SubtargetPredicate = isGFX11Plus
939939
940+ // FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
941+ // instead of less complex f16. Disable GlobalISel for these for now.
942+ def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> {
943+ let GISelPredicateCode = [{return false;}];
944+ }
945+
946+ let SubtargetPredicate = HasBF16ConversionInsts in {
947+ let ReadsModeReg = 0 in {
948+ defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
949+ }
950+ def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
951+ (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
952+ def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
953+ (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
954+ 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
955+ def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
956+ (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
957+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
958+ def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
959+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
960+ def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
961+ (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
962+ }
963+
940964let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941965 defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942966 defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1701,5 +1725,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
17011725
17021726defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
17031727defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
1728+ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
17041729defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
17051730defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
0 commit comments