@@ -95,6 +95,7 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
9595
9696 // copy relevant pseudo op flags
9797 let SubtargetPredicate = ps.SubtargetPredicate;
98+ let True16Predicate = ps.True16Predicate;
9899 let OtherPredicates = ps.OtherPredicates;
99100 let AsmMatchConverter = ps.AsmMatchConverter;
100101 let AsmVariantName = ps.AsmVariantName;
@@ -223,8 +224,11 @@ multiclass VOP2Inst_e64_t16<string opName,
223224 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
224225 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>;
225226 }
226- let SubtargetPredicate = HasTrue16BitInsts in {
227- defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>;
227+ let SubtargetPredicate = UseRealTrue16Insts in {
228+ defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>;
229+ }
230+ let SubtargetPredicate = UseFakeTrue16Insts in {
231+ defm _fake16 : VOP2Inst_e64<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>;
228232 }
229233}
230234
@@ -388,9 +392,14 @@ class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
388392
389393def VOP_MADAK_F16 : VOP_MADAK <f16>;
390394def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
395+ let IsRealTrue16 = 1;
396+ let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
397+ let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm);
398+ }
399+ def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
391400 let IsTrue16 = 1;
392- let DstRC = VOPDstOperand<VGPR_32_Lo128> ;
393- let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128 :$src1, ImmOpType:$imm);
401+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret ;
402+ let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPRSrc_32_Lo128 :$src1, ImmOpType:$imm);
394403}
395404def VOP_MADAK_F32 : VOP_MADAK <f32>;
396405
@@ -413,17 +422,24 @@ class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
413422
414423def VOP_MADMK_F16 : VOP_MADMK <f16>;
415424def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
425+ let IsRealTrue16 = 1;
426+ let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
427+ let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1);
428+ }
429+ def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
416430 let IsTrue16 = 1;
417- let DstRC = VOPDstOperand<VGPR_32_Lo128> ;
418- let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128 :$src1);
431+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret ;
432+ let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128 :$src1);
419433}
420434def VOP_MADMK_F32 : VOP_MADMK <f32>;
421435
422436// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
423437// and processing time but it makes it easier to convert to mad.
424438class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
425439 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2);
426- let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 3,
440+ // Src2 must accept the same operand types as vdst, namely VGPRs only
441+ let Src2RC64 = getVOP3VRegSrcForVT<Src2VT, IsTrue16, !not(IsRealTrue16)>.ret;
442+ let Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, 3,
427443 0, HasModifiers, HasModifiers, HasOMod,
428444 Src0Mod, Src1Mod, Src2Mod>.ret;
429445 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
@@ -478,21 +494,19 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
478494}
479495
480496def VOP_MAC_F16 : VOP_MAC <f16>;
481- def VOP_MAC_F16_t16 : VOP_MAC <f16> {
497+ def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
482498 let IsTrue16 = 1;
483- let HasOpSel = 1;
484- let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod,
485- HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret;
486- let DstRC = VOPDstOperand<VGPR_32_Lo128>;
487- let DstRC64 = VOPDstOperand<VGPR_32>;
488- let Src1RC32 = VGPRSrc_32_Lo128;
499+ let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
500+ let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
501+ let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
489502 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2);
490503 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
491504 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
492505 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
493- let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
494- let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
495- let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
506+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
507+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
508+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
509+
496510 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
497511 Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
498512 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
@@ -502,10 +516,47 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
502516 Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
503517 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
504518 dpp8:$dpp8, Dpp8FI:$fi);
505- let Src2Mod = FP32InputMods; // dummy unused modifiers
506- let Src2RC64 = VGPRSrc_32; // stub argument
519+ let DstRC64 = getVALUDstForVT<DstVT>.ret;
520+ let Src0VOP3DPP = VGPRSrc_32;
521+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
522+ let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
523+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
507524 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
525+ let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
526+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
527+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
528+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
529+ }
530+ def VOP_MAC_F16_t16 : VOP_MAC <f16> {
531+ let IsTrue16 = 1;
532+ let IsRealTrue16 = 1;
533+ let HasOpSel = 1;
534+ let DstRC = VOPDstOperand_t16Lo128;
535+ let Src1RC32 = VGPRSrc_16_Lo128;
536+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret:$src2);
537+ let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
538+ let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
539+ let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
540+ let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
541+ let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
542+ let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
543+ let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
544+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
545+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret:$src2, // stub argument
546+ dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
547+ DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
548+ let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
549+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
550+ getVregSrcForVT<Src2VT, 1/*IsTrue16*/>.ret:$src2, // stub argument
551+ dpp8:$dpp8, Dpp8FI:$fi);
552+ let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
553+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
554+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
555+ let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/>.ret;
556+ let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/>.ret;
557+ let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/>.ret;
508558}
559+
509560def VOP_MAC_F32 : VOP_MAC <f32>;
510561let HasExtDPP = 0, HasExt32BitDPP = 0 in
511562def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
@@ -664,15 +715,35 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
664715}
665716
666717def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
718+ def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
719+ // V_CNDMASK_B16 is VOP3 only
720+ def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
721+ let IsTrue16 = 1;
722+ let IsRealTrue16 = 1;
723+ let HasOpSel = 1;
724+ let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
725+ let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
726+ let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
727+ let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
728+ let Src0Mod = FPT16InputMods<0/*IsFake16*/>;
729+ let HasSrc2Mods = 0;
730+ let InsVOP3OpSel = getInsVOP3Base<Src0RC64, Src1RC64,
731+ Src2RC64, NumSrcArgs,
732+ HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
733+ Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
734+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT>.ret;
735+ let Src1ModVOP3DPP = getSrcModVOP3DPP<f16>.ret;
736+ }
667737def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
668738 let IsTrue16 = 1;
669739 let DstRC64 = getVALUDstForVT<DstVT>.ret;
670740
671- let Src0Mod = getSrcMod <f16>.ret;
672- let Src1Mod = getSrcMod<f16>.ret;
741+ let Src0Mod = getSrc0Mod <f16, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/ >.ret;
742+ let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 1/*IsFake16*/ >.ret;
673743
674744 let Src0VOP3DPP = VGPRSrc_32;
675- let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
745+ let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
746+ let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret;
676747 let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret;
677748}
678749
@@ -714,8 +785,9 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
714785// VOP2 Instructions
715786//===----------------------------------------------------------------------===//
716787
717- let SubtargetPredicate = isGFX11Plus in
718- defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>;
788+ defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>;
789+ defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>;
790+
719791defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
720792let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
721793def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
@@ -942,7 +1014,6 @@ let FPDPRounding = 1 in {
9421014 let SubtargetPredicate = UseFakeTrue16Insts in
9431015 defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">;
9441016} // End FPDPRounding = 1
945- // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
9461017defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
9471018defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>;
9481019defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>;
@@ -1006,16 +1077,23 @@ let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in {
10061077let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
10071078def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
10081079}
1009- let SubtargetPredicate = HasTrue16BitInsts in {
1010- def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">;
1080+ let True16Predicate = UseRealTrue16Insts in {
1081+ def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">;
10111082}
1083+ let True16Predicate = UseFakeTrue16Insts in {
1084+ def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">;
1085+ }
1086+
10121087
10131088let isCommutable = 1 in {
10141089let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
10151090def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
10161091}
1017- let SubtargetPredicate = HasTrue16BitInsts in {
1018- def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
1092+ let True16Predicate = UseRealTrue16Insts in {
1093+ def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
1094+ }
1095+ let True16Predicate = UseFakeTrue16Insts in {
1096+ def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">;
10191097}
10201098} // End isCommutable = 1
10211099} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1
@@ -1024,12 +1102,17 @@ let Constraints = "$vdst = $src2",
10241102 DisableEncoding="$src2",
10251103 isConvertibleToThreeAddress = 1,
10261104 isCommutable = 1 in {
1027- let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
1028- defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
1105+ let SubtargetPredicate = isGFX10Plus in {
1106+ let True16Predicate = NotHasTrue16BitInsts in {
1107+ defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
1108+ }
1109+ let True16Predicate = UseRealTrue16Insts in {
1110+ defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>;
10291111}
1030- let SubtargetPredicate = HasTrue16BitInsts in {
1031- defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16 ", VOP_MAC_F16_t16 >;
1112+ let True16Predicate = UseFakeTrue16Insts in {
1113+ defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16 ", VOP_MAC_F16_fake16 >;
10321114}
1115+ } // End SubtargetPredicate = isGFX10Plus
10331116} // End FMAC Constraints
10341117
10351118let SubtargetPredicate = Has16BitInsts in {
@@ -1625,9 +1708,9 @@ defm V_SUBREV_CO_CI_U32 :
16251708
16261709defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">;
16271710defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">;
1628- defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">;
1711+ defm V_MIN_NUM_F16_t16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">;
16291712defm V_MIN_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_fake16", "v_min_num_f16", "v_min_f16">;
1630- defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">;
1713+ defm V_MAX_NUM_F16_t16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">;
16311714defm V_MAX_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_fake16", "v_max_num_f16", "v_max_f16">;
16321715
16331716let SubtargetPredicate = isGFX12Plus in {
@@ -1663,6 +1746,14 @@ multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName,
16631746 }
16641747}
16651748
1749+ multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME> :
1750+ VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>;
1751+
1752+ multiclass VOP2_Real_FULL_t16_and_f16_gfx11<bits<6> op, string asmName, string opName = NAME> {
1753+ defm opName#"_t16": VOP2_Real_FULL_t16_gfx11<GFX11Gen, op, opName#"_t16", asmName>;
1754+ defm opName#"_fake16": VOP2_Real_FULL_t16_gfx11<GFX11Gen, op, opName"_fake16", asmName>;
1755+ }
1756+
16661757multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName,
16671758 string asmName> :
16681759 VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>;
0 commit comments