@@ -1104,7 +1104,7 @@ def : Pat <
11041104// VOP1 Patterns
11051105//===----------------------------------------------------------------------===//
11061106
1107- multiclass f16_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1107+ multiclass f16_to_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
11081108 // f16_to_fp patterns
11091109 def : GCNPat <
11101110 (f32 (any_f16_to_fp i32:$src0)),
@@ -1131,25 +1131,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11311131 (cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
11321132 >;
11331133
1134+ // fp_to_fp16 patterns
11341135 def : GCNPat <
1135- (f64 (any_fpextend f16:$src )),
1136- (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src) )
1136+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)) )),
1137+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0 )
11371138 >;
11381139
1139- // fp_to_fp16 patterns
1140+ // This is only used on targets without half support
1141+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
11401142 def : GCNPat <
1141- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1143+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
11421144 (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
11431145 >;
1146+ }
1147+
1148+ let SubtargetPredicate = NotHasTrue16BitInsts in
1149+ defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1150+
1151+ let SubtargetPredicate = UseFakeTrue16Insts in
1152+ defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1153+
1154+ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
1155+ Instruction cvt_f32_f16_inst_e64,
1156+ RegOrImmOperand VSrc> {
1157+ def : GCNPat <
1158+ (f64 (any_fpextend f16:$src)),
1159+ (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
1160+ >;
11441161
11451162 def : GCNPat <
11461163 (i32 (fp_to_sint f16:$src)),
1147- (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1164+ (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
11481165 >;
11491166
11501167 def : GCNPat <
11511168 (i32 (fp_to_uint f16:$src)),
1152- (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1169+ (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
11531170 >;
11541171
11551172 def : GCNPat <
@@ -1161,20 +1178,16 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11611178 (f16 (uint_to_fp i32:$src)),
11621179 (cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src))
11631180 >;
1164-
1165- // This is only used on targets without half support
1166- // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1167- def : GCNPat <
1168- (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1169- (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1170- >;
11711181}
11721182
11731183let SubtargetPredicate = NotHasTrue16BitInsts in
1174- defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1184+ defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64, VSrc_b32 >;
11751185
1176- let SubtargetPredicate = HasTrue16BitInsts in
1177- defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64>;
1186+ let SubtargetPredicate = UseRealTrue16Insts in
1187+ defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64, VSrcT_b16>;
1188+
1189+ let SubtargetPredicate = UseFakeTrue16Insts in
1190+ defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64, VSrc_b16>;
11781191
11791192//===----------------------------------------------------------------------===//
11801193// VOP2 Patterns
@@ -2784,13 +2797,24 @@ def : GCNPat <
27842797 SSrc_i1:$src))
27852798>;
27862799
2787- let SubtargetPredicate = HasTrue16BitInsts in
2800+ let SubtargetPredicate = UseRealTrue16Insts in
27882801def : GCNPat <
27892802 (f16 (sint_to_fp i1:$src)),
2790- (V_CVT_F16_F32_t16_e32 (
2791- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2803+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2804+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
27922805 /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2793- SSrc_i1:$src))
2806+ SSrc_i1:$src),
2807+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2808+ >;
2809+
2810+ let SubtargetPredicate = UseFakeTrue16Insts in
2811+ def : GCNPat <
2812+ (f16 (sint_to_fp i1:$src)),
2813+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2814+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2815+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2816+ SSrc_i1:$src),
2817+ /*clamp*/ 0, /*omod*/ 0)
27942818>;
27952819
27962820let SubtargetPredicate = NotHasTrue16BitInsts in
@@ -2801,13 +2825,25 @@ def : GCNPat <
28012825 /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
28022826 SSrc_i1:$src))
28032827>;
2804- let SubtargetPredicate = HasTrue16BitInsts in
2828+
2829+ let SubtargetPredicate = UseRealTrue16Insts in
28052830def : GCNPat <
28062831 (f16 (uint_to_fp i1:$src)),
2807- (V_CVT_F16_F32_t16_e32 (
2808- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2832+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2833+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
28092834 /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2810- SSrc_i1:$src))
2835+ SSrc_i1:$src),
2836+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2837+ >;
2838+
2839+ let SubtargetPredicate = UseFakeTrue16Insts in
2840+ def : GCNPat <
2841+ (f16 (uint_to_fp i1:$src)),
2842+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2843+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2844+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2845+ SSrc_i1:$src),
2846+ /*clamp*/ 0, /*omod*/ 0)
28112847>;
28122848
28132849def : GCNPat <
0 commit comments