@@ -1094,7 +1094,7 @@ def : Pat <
10941094// VOP1 Patterns
10951095//===----------------------------------------------------------------------===//
10961096
1097- multiclass f16_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1097+ multiclass f16_to_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
10981098 // f16_to_fp patterns
10991099 def : GCNPat <
11001100 (f32 (any_f16_to_fp i32:$src0)),
@@ -1121,25 +1121,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11211121 (cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
11221122 >;
11231123
1124+ // fp_to_fp16 patterns
11241125 def : GCNPat <
1125- (f64 (any_fpextend f16:$src )),
1126- (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src) )
1126+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)) )),
1127+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0 )
11271128 >;
11281129
1129- // fp_to_fp16 patterns
1130+ // This is only used on targets without half support
1131+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
11301132 def : GCNPat <
1131- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1133+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
11321134 (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
11331135 >;
1136+ }
1137+
1138+ let SubtargetPredicate = NotHasTrue16BitInsts in
1139+ defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1140+
1141+ let SubtargetPredicate = UseFakeTrue16Insts in
1142+ defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1143+
1144+ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
1145+ Instruction cvt_f32_f16_inst_e64,
1146+ RegOrImmOperand VSrc> {
1147+ def : GCNPat <
1148+ (f64 (any_fpextend f16:$src)),
1149+ (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
1150+ >;
11341151
11351152 def : GCNPat <
11361153 (i32 (fp_to_sint f16:$src)),
1137- (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1154+ (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
11381155 >;
11391156
11401157 def : GCNPat <
11411158 (i32 (fp_to_uint f16:$src)),
1142- (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1159+ (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
11431160 >;
11441161
11451162 def : GCNPat <
@@ -1151,20 +1168,16 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11511168 (f16 (uint_to_fp i32:$src)),
11521169 (cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src))
11531170 >;
1154-
1155- // This is only used on targets without half support
1156- // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1157- def : GCNPat <
1158- (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1159- (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1160- >;
11611171}
11621172
1163- let True16Predicate = NotHasTrue16BitInsts in
1164- defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1173+ let SubtargetPredicate = NotHasTrue16BitInsts in
1174+ defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64, VSrc_b32>;
1175+
1176+ let SubtargetPredicate = UseRealTrue16Insts in
1177+ defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64, VSrcT_b16>;
11651178
1166- let True16Predicate = UseFakeTrue16Insts in
1167- defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1179+ let SubtargetPredicate = UseFakeTrue16Insts in
1180+ defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64, VSrc_b16 >;
11681181
11691182//===----------------------------------------------------------------------===//
11701183// VOP2 Patterns
@@ -2774,13 +2787,24 @@ def : GCNPat <
27742787 SSrc_i1:$src))
27752788>;
27762789
2777- let SubtargetPredicate = HasTrue16BitInsts in
2790+ let SubtargetPredicate = UseRealTrue16Insts in
27782791def : GCNPat <
27792792 (f16 (sint_to_fp i1:$src)),
2780- (V_CVT_F16_F32_fake16_e32 (
2781- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2793+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2794+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
27822795 /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2783- SSrc_i1:$src))
2796+ SSrc_i1:$src),
2797+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2798+ >;
2799+
2800+ let SubtargetPredicate = UseFakeTrue16Insts in
2801+ def : GCNPat <
2802+ (f16 (sint_to_fp i1:$src)),
2803+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2804+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2805+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2806+ SSrc_i1:$src),
2807+ /*clamp*/ 0, /*omod*/ 0)
27842808>;
27852809
27862810let SubtargetPredicate = NotHasTrue16BitInsts in
@@ -2791,13 +2815,25 @@ def : GCNPat <
27912815 /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
27922816 SSrc_i1:$src))
27932817>;
2794- let SubtargetPredicate = HasTrue16BitInsts in
2818+
2819+ let SubtargetPredicate = UseRealTrue16Insts in
27952820def : GCNPat <
27962821 (f16 (uint_to_fp i1:$src)),
2797- (V_CVT_F16_F32_fake16_e32 (
2798- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2822+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2823+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
27992824 /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2800- SSrc_i1:$src))
2825+ SSrc_i1:$src),
2826+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2827+ >;
2828+
2829+ let SubtargetPredicate = UseFakeTrue16Insts in
2830+ def : GCNPat <
2831+ (f16 (uint_to_fp i1:$src)),
2832+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2833+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2834+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2835+ SSrc_i1:$src),
2836+ /*clamp*/ 0, /*omod*/ 0)
28012837>;
28022838
28032839def : GCNPat <
0 commit comments