diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 083345d4d1e12..a78440dc7a1f4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2034,11 +2034,13 @@ class getInsVOP3P { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, + bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> { dag ret = getInsVOP3Base.ret; + Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1, + HasFP8ByteSel, HasFP8DstByteSel>.ret; } class getInsDPPBase { + bit Src2HasMods, + bit HasByteSel = 0> { string dst = "$vdst"; string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); @@ -2263,9 +2266,10 @@ class getAsmVOP3OpSel { @@ -2630,7 +2634,8 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; field dag InsVOP3OpSel = getInsVOP3OpSel.ret; + Src0Mod, Src1Mod, Src2Mod, + HasFP8ByteSel, HasFP8DstByteSel>.ret; field dag InsDPP = !if(HasExtDPP, getInsDPP.ret, @@ -2671,8 +2676,8 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { // the asm operand name via this HasModifiers flag field string AsmDPP8 = getAsmDPP8.ret; field string AsmVOP3Base = getAsmVOP3Base.ret; + HasOpSel, HasOMod, IsVOP3P, HasNeg, HasSrc0Mods, HasSrc1Mods, + HasSrc2Mods, DstVT, HasFP8ByteSel>.ret; field string Asm64 = AsmVOP3Base; field string AsmVOP3P = getAsmVOP3P.ret; field string AsmVOP3OpSel = getAsmVOP3OpSel _ArgVT, bit _EnableClamp = 0> { HasOMod, HasSrc0FloatMods, HasSrc1FloatMods, - HasSrc2FloatMods>.ret; + HasSrc2FloatMods, + HasFP8ByteSel>.ret; field string AsmVOP3DPP = getAsmVOP3DPP.ret; field string AsmVOP3DPP16 = getAsmVOP3DPP16.ret; field string AsmVOP3DPP8 = getAsmVOP3DPP8.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index a2672d71cb43c..046cce73ff761 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1055,6 +1055,7 @@ class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile : VOP3_Profil class VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile

{ let HasFP8DstByteSel = 1; + let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand. } class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile

{ @@ -1063,6 +1064,7 @@ class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile : VOP3_ FP32InputMods:$src2_modifiers, Src2RC64:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasFP8DstByteSel = 1; + let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand. } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 952ee2fe2c955..4cd845aaa5497 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1478,6 +1478,9 @@ class VOP3_Profile_Base : VO let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + let HasFP8SrcByteSel = P.HasFP8SrcByteSel; + let HasFP8DstByteSel = P.HasFP8DstByteSel; + let HasOMod = P.HasOMod; let HasModifiers = !if (Features.IsMAI, 0, @@ -1494,6 +1497,9 @@ class VOP3_Profile_True16 : let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + let HasFP8SrcByteSel = P.HasFP8SrcByteSel; + let HasFP8DstByteSel = P.HasFP8DstByteSel; + let HasOMod = P.HasOMod; let HasModifiers = !if (Features.IsMAI, 0, @@ -1506,6 +1512,9 @@ class VOP3_Profile_Fake16 : let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + let HasFP8SrcByteSel = P.HasFP8SrcByteSel; + let HasFP8DstByteSel = P.HasFP8DstByteSel; + let HasOMod = P.HasOMod; let HasModifiers = !if (Features.IsMAI, 0,