@@ -212,6 +212,11 @@ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
212212}
213213
214214
215+ def VOP_F64_F64_NO_DPP : VOPProfile <[f64, f64, untyped, untyped]> {
216+ let HasExtVOP3DPP = 0;
217+ let HasExt64BitDPP = 0;
218+ }
219+
215220//===----------------------------------------------------------------------===//
216221// VOP1 Instructions
217222//===----------------------------------------------------------------------===//
@@ -344,9 +349,9 @@ defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>;
344349} // End TRANS = 1, SchedRW = [WriteTrans32]
345350
346351let TRANS = 1, SchedRW = [WriteTrans64] in {
347- defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64 , AMDGPUrcp>;
348- defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64 , AMDGPUrsq>;
349- defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64 , int_amdgcn_sqrt>;
352+ defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64_NO_DPP , AMDGPUrcp>;
353+ defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64_NO_DPP , AMDGPUrsq>;
354+ defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64_NO_DPP , int_amdgcn_sqrt>;
350355} // End TRANS = 1, SchedRW = [WriteTrans64]
351356
352357let TRANS = 1, SchedRW = [WriteTrans32] in {
@@ -1025,6 +1030,11 @@ multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
10251030multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> :
10261031 VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>;
10271032
1033+ multiclass VOP1_Real_with_DPP16<GFXGen Gen, bits<9> op> :
1034+ VOP1_Real_NO_DPP<Gen, op>,
1035+ VOP1_Real_dpp<Gen, op>,
1036+ VOP3_Real_dpp_Base<Gen, {0, 1, 1, op{6-0}}>;
1037+
10281038multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName,
10291039 string opName = NAME> :
10301040 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
@@ -1057,17 +1067,22 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
10571067 VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
10581068}
10591069
1070+ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<bits<9> op, string opName,
1071+ string asmName> :
1072+ VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
1073+ VOP1_Real_FULL_with_name<GFX12Not12_50Gen, op, opName, asmName>;
1074+
10601075multiclass VOP1_Real_OpSelIsDPP_gfx1250<bits<9> op> : VOP1_Real_e32<GFX1250Gen, op> {
10611076 defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
10621077 def _e64_gfx1250 :
10631078 VOP3_Real_Gen<ps, GFX1250Gen>,
10641079 VOP3OpSelIsDPP_gfx12<{0, 1, 1, op{6-0}}, ps.Pfl>;
10651080}
10661081
1067- defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Not12_50Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
1068- defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
1082+ defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250< 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
1083+ defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
10691084
1070- defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
1085+ defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
10711086
10721087defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
10731088defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
@@ -1252,17 +1267,17 @@ let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
12521267multiclass VOP1_Real_gfx7<bits<9> op> :
12531268 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>;
12541269
1255- multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <bits<9> op> :
1270+ multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <bits<9> op> :
12561271 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1257- VOP1_Real_NO_DPP <GFX12Gen, op>;
1272+ VOP1_Real_with_DPP16 <GFX12Gen, op>;
12581273
12591274defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
12601275defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
12611276
1262- defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x017>;
1263- defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x018>;
1264- defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x019>;
1265- defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x01a>;
1277+ defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x017>;
1278+ defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x018>;
1279+ defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x019>;
1280+ defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x01a>;
12661281
12671282//===----------------------------------------------------------------------===//
12681283// GFX6, GFX7, GFX10, GFX11, GFX12
@@ -1300,6 +1315,10 @@ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
13001315 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
13011316 VOP1_Real_NO_DPP<GFX12Gen, op>;
13021317
1318+ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<bits<9> op> :
1319+ VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1320+ VOP1_Real_with_DPP16<GFX12Gen, op>;
1321+
13031322multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<9> op> :
13041323 VOP1Only_Real_gfx6_gfx7<op>, VOP1Only_Real_gfx10_gfx11_gfx12<op>;
13051324
@@ -1314,8 +1333,8 @@ defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
13141333defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>;
13151334defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>;
13161335defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1317- defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x003>;
1318- defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x004>;
1336+ defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x003>;
1337+ defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x004>;
13191338defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>;
13201339defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>;
13211340defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>;
@@ -1325,14 +1344,14 @@ defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
13251344defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
13261345defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
13271346defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>;
1328- defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x00f>;
1329- defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x010>;
1347+ defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x00f>;
1348+ defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x010>;
13301349defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>;
13311350defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>;
13321351defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>;
13331352defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>;
1334- defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x015>;
1335- defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x016>;
1353+ defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x015>;
1354+ defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x016>;
13361355defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>;
13371356defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>;
13381357defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>;
@@ -1354,9 +1373,9 @@ defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>;
13541373defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
13551374defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
13561375defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
1357- defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x03c>;
1358- defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x03d>;
1359- defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x03e>;
1376+ defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x03c>;
1377+ defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x03d>;
1378+ defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x03e>;
13601379defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>;
13611380defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>;
13621381defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
@@ -1410,7 +1429,9 @@ multiclass VOP1_Real_vi <bits<10> op> {
14101429 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
14111430 def _dpp_vi :
14121431 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
1413- VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
1432+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> {
1433+ let AssemblerPredicate = isGFX8GFX9;
1434+ }
14141435}
14151436
14161437defm V_NOP : VOP1_Real_vi <0x0>;
0 commit comments