@@ -187,21 +187,17 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
187187 let HasClamp = 1;
188188}
189189
190- class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
191- VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
192-
193- let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod);
194- let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod);
195- let AsmVOP3Base = "$vdst, $src0$clamp$omod";
196-
197- let HasModifiers = 0;
198- let HasClamp = 1;
199- }
200-
201190def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
202191def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
203192def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
204- def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
193+ def VOP1_F16_I16_t16 : VOPProfile_True16 <VOP_F16_I16> {
194+ let HasClamp = 1;
195+ }
196+ def VOP1_F16_I16_fake16 : VOPProfile_Fake16<VOP_F16_I16> {
197+ let HasModifiers = 0;
198+ let HasOMod = 1;
199+ let HasClamp = 1;
200+ }
205201
206202def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
207203 let HasExtVOP3DPP = 0;
@@ -217,10 +213,14 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
217213def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
218214def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
219215def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
220- def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
216+ def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
217+ let HasOMod = 1;
218+ }
219+ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
221220 let HasOMod = 1;
222221}
223222
223+
224224//===----------------------------------------------------------------------===//
225225// VOP1 Instructions
226226//===----------------------------------------------------------------------===//
@@ -479,24 +479,16 @@ let SubtargetPredicate = isGFX7Plus in {
479479} // End isReMaterializable = 1
480480
481481let FPDPRounding = 1 in {
482- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
483- defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
484- defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
485- }
486- let OtherPredicates = [HasTrue16BitInsts] in {
487- defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
488- defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
489- }
482+ defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
483+ defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
484+
490485} // End FPDPRounding = 1
491486// OMod clears exceptions when set in these two instructions
492- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
493- defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
494- defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
495- }
496- let OtherPredicates = [HasTrue16BitInsts] in {
497- defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
498- defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
499- }
487+ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
488+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
489+ defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
490+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
491+
500492let TRANS = 1, SchedRW = [WriteTrans32] in {
501493defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
502494defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -507,12 +499,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
507499defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
508500} // End TRANS = 1, SchedRW = [WriteTrans32]
509501defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
510- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
511- defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
512- }
513- let OtherPredicates = [HasTrue16BitInsts] in {
514- defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
515- }
502+ defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
503+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
516504defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
517505defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
518506defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -560,14 +548,10 @@ let SubtargetPredicate = isGFX9Plus in {
560548 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
561549
562550 let mayRaiseFPException = 0 in {
563- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
564- defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
565- defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
566- }
567- let OtherPredicates = [HasTrue16BitInsts] in {
568- defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
569- defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
570- }
551+ defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
552+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
553+ defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
554+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
571555 } // End mayRaiseFPException = 0
572556} // End SubtargetPredicate = isGFX9Plus
573557
@@ -939,6 +923,14 @@ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
939923 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
940924 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
941925
926+ multiclass VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<bits<9> op, string asmName,
927+ string opName = NAME> {
928+ defm opName#"_t16" :
929+ VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
930+ defm opName#"_fake16":
931+ VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
932+ }
933+
942934multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
943935 VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
944936
@@ -979,10 +971,10 @@ defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16"
979971defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
980972defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
981973
982- defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x050, "v_cvt_f16_u16">;
983- defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x051, "v_cvt_f16_i16">;
984- defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x052, "v_cvt_u16_f16">;
985- defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x053, "v_cvt_i16_f16">;
974+ defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x050, "v_cvt_f16_u16">;
975+ defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x051, "v_cvt_f16_i16">;
976+ defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x052, "v_cvt_u16_f16">;
977+ defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x053, "v_cvt_i16_f16">;
986978defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
987979defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
988980defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -994,7 +986,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
994986defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
995987defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
996988defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
997- defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x05a, "v_frexp_exp_i16_f16">;
989+ defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x05a, "v_frexp_exp_i16_f16">;
998990defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
999991defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
1000992defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1005,13 +997,11 @@ defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f1
1005997defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
1006998defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
1007999defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
1008- defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x063, "v_cvt_norm_i16_f16">;
1009- defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x064, "v_cvt_norm_u16_f16">;
1000+ defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x063, "v_cvt_norm_i16_f16">;
1001+ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x064, "v_cvt_norm_u16_f16">;
10101002
1011- defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1012- defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1013- defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1014- defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1003+ defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1004+ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
10151005
10161006//===----------------------------------------------------------------------===//
10171007// GFX10.
0 commit comments