@@ -212,6 +212,11 @@ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
212
212
}
213
213
214
214
215
+ def VOP_F64_F64_NO_DPP : VOPProfile <[f64, f64, untyped, untyped]> {
216
+ let HasExtVOP3DPP = 0;
217
+ let HasExt64BitDPP = 0;
218
+ }
219
+
215
220
//===----------------------------------------------------------------------===//
216
221
// VOP1 Instructions
217
222
//===----------------------------------------------------------------------===//
@@ -344,9 +349,9 @@ defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>;
344
349
} // End TRANS = 1, SchedRW = [WriteTrans32]
345
350
346
351
let TRANS = 1, SchedRW = [WriteTrans64] in {
347
- defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64 , AMDGPUrcp>;
348
- defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64 , AMDGPUrsq>;
349
- defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64 , int_amdgcn_sqrt>;
352
+ defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64_NO_DPP , AMDGPUrcp>;
353
+ defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64_NO_DPP , AMDGPUrsq>;
354
+ defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64_NO_DPP , int_amdgcn_sqrt>;
350
355
} // End TRANS = 1, SchedRW = [WriteTrans64]
351
356
352
357
let TRANS = 1, SchedRW = [WriteTrans32] in {
@@ -1025,6 +1030,11 @@ multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
1025
1030
multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> :
1026
1031
VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>;
1027
1032
1033
+ multiclass VOP1_Real_with_DPP16<GFXGen Gen, bits<9> op> :
1034
+ VOP1_Real_NO_DPP<Gen, op>,
1035
+ VOP1_Real_dpp<Gen, op>,
1036
+ VOP3_Real_dpp_Base<Gen, {0, 1, 1, op{6-0}}>;
1037
+
1028
1038
multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName,
1029
1039
string opName = NAME> :
1030
1040
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
@@ -1057,17 +1067,22 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
1057
1067
VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
1058
1068
}
1059
1069
1070
+ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<bits<9> op, string opName,
1071
+ string asmName> :
1072
+ VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
1073
+ VOP1_Real_FULL_with_name<GFX12Not12_50Gen, op, opName, asmName>;
1074
+
1060
1075
multiclass VOP1_Real_OpSelIsDPP_gfx1250<bits<9> op> : VOP1_Real_e32<GFX1250Gen, op> {
1061
1076
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
1062
1077
def _e64_gfx1250 :
1063
1078
VOP3_Real_Gen<ps, GFX1250Gen>,
1064
1079
VOP3OpSelIsDPP_gfx12<{0, 1, 1, op{6-0}}, ps.Pfl>;
1065
1080
}
1066
1081
1067
- defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Not12_50Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
1068
- defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
1082
+ defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250< 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
1083
+ defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
1069
1084
1070
- defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
1085
+ defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
1071
1086
1072
1087
defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
1073
1088
defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
@@ -1252,17 +1267,17 @@ let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1252
1267
multiclass VOP1_Real_gfx7<bits<9> op> :
1253
1268
VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>;
1254
1269
1255
- multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <bits<9> op> :
1270
+ multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <bits<9> op> :
1256
1271
VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1257
- VOP1_Real_NO_DPP <GFX12Gen, op>;
1272
+ VOP1_Real_with_DPP16 <GFX12Gen, op>;
1258
1273
1259
1274
defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
1260
1275
defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
1261
1276
1262
- defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x017>;
1263
- defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x018>;
1264
- defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x019>;
1265
- defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x01a>;
1277
+ defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x017>;
1278
+ defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x018>;
1279
+ defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x019>;
1280
+ defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x01a>;
1266
1281
1267
1282
//===----------------------------------------------------------------------===//
1268
1283
// GFX6, GFX7, GFX10, GFX11, GFX12
@@ -1300,6 +1315,10 @@ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> :
1300
1315
VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1301
1316
VOP1_Real_NO_DPP<GFX12Gen, op>;
1302
1317
1318
+ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<bits<9> op> :
1319
+ VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>,
1320
+ VOP1_Real_with_DPP16<GFX12Gen, op>;
1321
+
1303
1322
multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<9> op> :
1304
1323
VOP1Only_Real_gfx6_gfx7<op>, VOP1Only_Real_gfx10_gfx11_gfx12<op>;
1305
1324
@@ -1314,8 +1333,8 @@ defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
1314
1333
defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>;
1315
1334
defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>;
1316
1335
defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1317
- defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x003>;
1318
- defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x004>;
1336
+ defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x003>;
1337
+ defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x004>;
1319
1338
defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>;
1320
1339
defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>;
1321
1340
defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>;
@@ -1325,14 +1344,14 @@ defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
1325
1344
defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
1326
1345
defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
1327
1346
defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>;
1328
- defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x00f>;
1329
- defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x010>;
1347
+ defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x00f>;
1348
+ defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x010>;
1330
1349
defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>;
1331
1350
defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>;
1332
1351
defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>;
1333
1352
defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>;
1334
- defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x015>;
1335
- defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x016>;
1353
+ defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x015>;
1354
+ defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x016>;
1336
1355
defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>;
1337
1356
defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>;
1338
1357
defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>;
@@ -1354,9 +1373,9 @@ defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>;
1354
1373
defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
1355
1374
defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
1356
1375
defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
1357
- defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x03c>;
1358
- defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x03d>;
1359
- defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 <0x03e>;
1376
+ defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x03c>;
1377
+ defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x03d>;
1378
+ defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 <0x03e>;
1360
1379
defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>;
1361
1380
defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>;
1362
1381
defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
@@ -1410,7 +1429,9 @@ multiclass VOP1_Real_vi <bits<10> op> {
1410
1429
if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
1411
1430
def _dpp_vi :
1412
1431
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
1413
- VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
1432
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> {
1433
+ let AssemblerPredicate = isGFX8GFX9;
1434
+ }
1414
1435
}
1415
1436
1416
1437
defm V_NOP : VOP1_Real_vi <0x0>;
0 commit comments