From f4300d458903c52d33efa132585f6d516b9ccda7 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Fri, 13 Sep 2024 08:45:06 -0400 Subject: [PATCH] tmp --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 161 ++++++++++++++++----- 1 file changed, 126 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index dd48607402eb0..c31751c6d6127 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -95,6 +95,7 @@ class VOP2_Real ; } - let SubtargetPredicate = HasTrue16BitInsts in { - defm _t16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; + let SubtargetPredicate = UseRealTrue16Insts in { + defm _t16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; + } + let SubtargetPredicate = UseFakeTrue16Insts in { + defm _fake16 : VOP2Inst_e64, node, revOp#"_fake16", GFX9Renamed>; } } @@ -388,9 +392,14 @@ class VOP_MADAK : VOP_MADK_Base { def VOP_MADAK_F16 : VOP_MADAK ; def VOP_MADAK_F16_t16 : VOP_MADAK { + let IsRealTrue16 = 1; + let DstRC = getVALUDstForVT.ret; + let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm); +} +def VOP_MADAK_F16_fake16 : VOP_MADAK { let IsTrue16 = 1; - let DstRC = VOPDstOperand; - let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); + let DstRC = getVALUDstForVT_fake16.ret; + let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm); } def VOP_MADAK_F32 : VOP_MADAK ; @@ -413,9 +422,14 @@ class VOP_MADMK : VOP_MADK_Base { def VOP_MADMK_F16 : VOP_MADMK ; def VOP_MADMK_F16_t16 : VOP_MADMK { + let IsRealTrue16 = 1; + let DstRC = getVALUDstForVT.ret; + let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1); +} +def VOP_MADMK_F16_fake16 : VOP_MADMK { let IsTrue16 = 1; - let DstRC = VOPDstOperand; - let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); + let DstRC = getVALUDstForVT_fake16.ret; + let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1); } def VOP_MADMK_F32 : VOP_MADMK ; @@ -423,7 +437,9 @@ def VOP_MADMK_F32 : VOP_MADMK ; // and processing time but it makes it easier to convert to mad. class VOP_MAC : VOPProfile <[vt0, vt1, vt1, vt0]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT.ret:$src2); - let Ins64 = getIns64.ret, 3, + // Src2 must accept the same operand types as vdst, namely VGPRs only + let Src2RC64 = getVOP3VRegSrcForVT.ret; + let Ins64 = getIns64.ret; let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, @@ -478,21 +494,19 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v } def VOP_MAC_F16 : VOP_MAC ; -def VOP_MAC_F16_t16 : VOP_MAC { +def VOP_MAC_F16_fake16 : VOP_MAC { let IsTrue16 = 1; - let HasOpSel = 1; - let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod, - HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret; - let DstRC = VOPDstOperand; - let DstRC64 = VOPDstOperand; - let Src1RC32 = VGPRSrc_32_Lo128; + let DstRC = getVALUDstForVT_fake16.ret; + let Src0RC32 = getVOPSrc0ForVT.ret; + let Src1RC32 = getVregSrcForVT.ret; let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT.ret:$src2); let Src0DPP = getVregSrcForVT.ret; let Src1DPP = getVregSrcForVT.ret; let Src2DPP = getVregSrcForVT.ret; - let Src0ModDPP = getSrcModDPP_t16.ret; - let Src1ModDPP = getSrcModDPP_t16.ret; - let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, getVregSrcForVT.ret:$src2, // stub argument @@ -502,10 +516,47 @@ def VOP_MAC_F16_t16 : VOP_MAC { Src1ModDPP:$src1_modifiers, Src1DPP:$src1, getVregSrcForVT.ret:$src2, // stub argument dpp8:$dpp8, Dpp8FI:$fi); - let Src2Mod = FP32InputMods; // dummy unused modifiers - let Src2RC64 = VGPRSrc_32; // stub argument + let DstRC64 = getVALUDstForVT.ret; + let Src0VOP3DPP = VGPRSrc_32; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src0Mod = getSrc0Mod.ret; + let Src1Mod = getSrcMod.ret; + let Src2Mod = getSrcMod.ret; +} +def VOP_MAC_F16_t16 : VOP_MAC { + let IsTrue16 = 1; + let IsRealTrue16 = 1; + let HasOpSel = 1; + let DstRC = VOPDstOperand_t16Lo128; + let Src1RC32 = VGPRSrc_16_Lo128; + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT.ret:$src2); + let Src0DPP = getVregSrcForVT.ret; + let Src1DPP = getVregSrcForVT.ret; + let Src2DPP = getVregSrcForVT.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + getVregSrcForVT.ret:$src2, // stub argument + dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, + DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); + let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, + Src1ModDPP:$src1_modifiers, Src1DPP:$src1, + getVregSrcForVT.ret:$src2, // stub argument + dpp8:$dpp8, Dpp8FI:$fi); + let DstRC64 = getVALUDstForVT.ret; + let Src0RC64 = getVOP3SrcForVT.ret; + let Src1RC64 = getVOP3SrcForVT.ret; + let Src0Mod = getSrc0Mod.ret; + let Src1Mod = getSrcMod.ret; + let Src2Mod = getSrcMod.ret; } + def VOP_MAC_F32 : VOP_MAC ; let HasExtDPP = 0, HasExt32BitDPP = 0 in def VOP_MAC_LEGACY_F32 : VOP_MAC ; @@ -664,15 +715,35 @@ class VOP2e_SGPR ArgVT> : VOPProfile { } def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; +def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; +// V_CNDMASK_B16 is VOP3 only +def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> { + let IsTrue16 = 1; + let IsRealTrue16 = 1; + let HasOpSel = 1; + let DstRC64 = getVALUDstForVT.ret; + let Src0RC64 = getVOP3SrcForVT.ret; + let Src1RC64 = getVOP3SrcForVT.ret; + let Src2RC64 = getVOP3SrcForVT.ret; + let Src0Mod = FPT16InputMods<0/*IsFake16*/>; + let HasSrc2Mods = 0; + let InsVOP3OpSel = getInsVOP3Base.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; +} def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { let IsTrue16 = 1; let DstRC64 = getVALUDstForVT.ret; - let Src0Mod = getSrcMod.ret; - let Src1Mod = getSrcMod.ret; + let Src0Mod = getSrc0Mod.ret; + let Src1Mod = getSrcMod.ret; let Src0VOP3DPP = VGPRSrc_32; - let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; } @@ -714,8 +785,9 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { // VOP2 Instructions //===----------------------------------------------------------------------===// -let SubtargetPredicate = isGFX11Plus in -defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>; +defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>; +defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>; + defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; @@ -942,7 +1014,6 @@ let FPDPRounding = 1 in { let SubtargetPredicate = UseFakeTrue16Insts in defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">; } // End FPDPRounding = 1 -// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; @@ -1006,16 +1077,23 @@ let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; } -let SubtargetPredicate = HasTrue16BitInsts in { -def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; +let True16Predicate = UseRealTrue16Insts in { + def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; } +let True16Predicate = UseFakeTrue16Insts in { + def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">; +} + let isCommutable = 1 in { let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; } -let SubtargetPredicate = HasTrue16BitInsts in { -def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; +let True16Predicate = UseRealTrue16Insts in { + def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; +} +let True16Predicate = UseFakeTrue16Insts in { + def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">; } } // End isCommutable = 1 } // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 @@ -1024,12 +1102,17 @@ let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1, isCommutable = 1 in { -let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { -defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; +let SubtargetPredicate = isGFX10Plus in { +let True16Predicate = NotHasTrue16BitInsts in { + defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; +} +let True16Predicate = UseRealTrue16Insts in { + defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; } -let SubtargetPredicate = HasTrue16BitInsts in { -defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; +let True16Predicate = UseFakeTrue16Insts in { + defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>; } +} // End SubtargetPredicate = isGFX10Plus } // End FMAC Constraints let SubtargetPredicate = Has16BitInsts in { @@ -1625,9 +1708,9 @@ defm V_SUBREV_CO_CI_U32 : defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">; defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">; -defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">; +defm V_MIN_NUM_F16_t16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">; defm V_MIN_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_fake16", "v_min_num_f16", "v_min_f16">; -defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">; +defm V_MAX_NUM_F16_t16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">; defm V_MAX_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_fake16", "v_max_num_f16", "v_max_f16">; let SubtargetPredicate = isGFX12Plus in { @@ -1663,6 +1746,14 @@ multiclass VOP2_Real_NO_VOP3_with_name_gfx11 op, string opName, } } +multiclass VOP2_Real_FULL_t16_gfx11 op, string asmName, string opName = NAME> : + VOP2_Real_FULL_with_name; + +multiclass VOP2_Real_FULL_t16_and_f16_gfx11 op, string asmName, string opName = NAME> { + defm opName#"_t16": VOP2_Real_FULL_t16_gfx11; + defm opName#"_fake16": VOP2_Real_FULL_t16_gfx11; +} + multiclass VOP2_Real_NO_DPP_with_name_gfx11 op, string opName, string asmName> : VOP2_Real_NO_DPP_with_name;