@@ -196,12 +196,20 @@ class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
196196
197197 let HasModifiers = 0;
198198 let HasClamp = 1;
199+
200+ // this is a hack to avoid decoderNamespace issue
201+ let IsRealTrue16 = 1;
199202}
200203
201204def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
202205def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
203206def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
204207def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
208+ def VOP1_F16_I16_fake16 : VOPProfile_Fake16 <VOP_F16_I16> {
209+ let HasModifiers = 0;
210+ let HasOMod = 1;
211+ let HasClamp = 1;
212+ }
205213
206214def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
207215 let HasExtVOP3DPP = 0;
@@ -219,6 +227,12 @@ def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
219227def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
220228def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
221229 let HasOMod = 1;
230+
231+ // this is a hack to avoid decoderNamespace issue
232+ let IsRealTrue16 = 1;
233+ }
234+ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
235+ let HasOMod = 1;
222236}
223237
224238//===----------------------------------------------------------------------===//
@@ -294,16 +308,22 @@ defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
294308defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>;
295309defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>;
296310let FPDPRounding = 1, isReMaterializable = 0 in {
311+ // V_CVT_F16_F32 and V_CVT_F32_F16 are a special case because they are
312+ // present in targets without Has16BitInsts. Otherwise they could use
313+ // class VOP1Inst_t16.
297314 let OtherPredicates = [NotHasTrue16BitInsts] in
298- defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
299- let OtherPredicates = [HasTrue16BitInsts] in
300- defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
315+ defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
316+ let OtherPredicates = [UseRealTrue16Insts] in
317+ defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfileI2F_True16<f16, f32>, any_fpround>;
318+ let OtherPredicates = [UseFakeTrue16Insts] in
319+ defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
301320} // End FPDPRounding = 1, isReMaterializable = 0
302-
303321let OtherPredicates = [NotHasTrue16BitInsts] in
304- defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
305- let OtherPredicates = [HasTrue16BitInsts] in
306- defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
322+ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
323+ let OtherPredicates = [UseRealTrue16Insts] in
324+ defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfileI2F_True16<f32, f16>, any_fpextend>;
325+ let OtherPredicates = [UseFakeTrue16Insts] in
326+ defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
307327
308328let ReadsModeReg = 0, mayRaiseFPException = 0 in {
309329defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -473,24 +493,15 @@ let SubtargetPredicate = isGFX7Plus in {
473493} // End isReMaterializable = 1
474494
475495let FPDPRounding = 1 in {
476- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
477- defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
478- defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
479- }
480- let OtherPredicates = [HasTrue16BitInsts] in {
481- defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
482- defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
483- }
496+ defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
497+ defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
484498} // End FPDPRounding = 1
485499// OMod clears exceptions when set in these two instructions
486- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
487- defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
488- defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
489- }
490- let OtherPredicates = [HasTrue16BitInsts] in {
491- defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
492- defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
493- }
500+ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
501+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
502+ defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
503+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
504+
494505let TRANS = 1, SchedRW = [WriteTrans32] in {
495506defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
496507defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -501,12 +512,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
501512defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
502513} // End TRANS = 1, SchedRW = [WriteTrans32]
503514defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
504- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
505- defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
506- }
507- let OtherPredicates = [HasTrue16BitInsts] in {
508- defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
509- }
515+ defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
516+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
510517defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
511518defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
512519defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -525,7 +532,7 @@ def : GCNPat<
525532 (V_CVT_F16_F32_e32 $src)
526533>;
527534}
528- let OtherPredicates = [HasTrue16BitInsts ] in {
535+ let OtherPredicates = [UseRealTrue16Insts ] in {
529536def : GCNPat<
530537 (f32 (f16_to_fp i16:$src)),
531538 (V_CVT_F32_F16_t16_e32 $src)
@@ -535,6 +542,16 @@ def : GCNPat<
535542 (V_CVT_F16_F32_t16_e32 $src)
536543>;
537544}
545+ let OtherPredicates = [UseFakeTrue16Insts] in {
546+ def : GCNPat<
547+ (f32 (f16_to_fp i16:$src)),
548+ (V_CVT_F32_F16_fake16_e32 $src)
549+ >;
550+ def : GCNPat<
551+ (i16 (AMDGPUfp_to_f16 f32:$src)),
552+ (V_CVT_F16_F32_fake16_e32 $src)
553+ >;
554+ }
538555
539556def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> {
540557 let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1);
@@ -554,14 +571,10 @@ let SubtargetPredicate = isGFX9Plus in {
554571 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
555572
556573 let mayRaiseFPException = 0 in {
557- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
558- defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
559- defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
560- }
561- let OtherPredicates = [HasTrue16BitInsts] in {
562- defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
563- defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
564- }
574+ defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
575+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
576+ defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
577+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
565578 } // End mayRaiseFPException = 0
566579} // End SubtargetPredicate = isGFX9Plus
567580
@@ -975,9 +988,13 @@ defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_
975988defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
976989
977990defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
991+ defm V_CVT_F16_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
978992defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
993+ defm V_CVT_F16_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
979994defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
995+ defm V_CVT_U16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
980996defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
997+ defm V_CVT_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
981998defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
982999defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
9831000defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -990,6 +1007,7 @@ defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16"
9901007defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
9911008defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
9921009defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
1010+ defm V_FREXP_EXP_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
9931011defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
9941012defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
9951013defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1001,10 +1019,14 @@ defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16"
10011019defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
10021020defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
10031021defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
1022+ defm V_CVT_NORM_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
10041023defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
1024+ defm V_CVT_NORM_U16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
10051025
10061026defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1027+ defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
10071028defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1029+ defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
10081030
10091031//===----------------------------------------------------------------------===//
10101032// GFX10.
0 commit comments