@@ -1035,6 +1035,20 @@ multiclass VOPCClassPat64<string inst_name> {
10351035 >;
10361036}
10371037
1038+ multiclass VOPCClassPat64_t16<string inst_name> {
1039+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_t16_e64");
1040+ defvar P = inst.Pfl;
1041+ def : GCNPat <
1042+ (i1:$sdst
1043+ (AMDGPUfp_class
1044+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
1045+ i32:$src1)),
1046+ (inst i32:$src0_modifiers, VSrcT_f16:$src0,
1047+ 0 /* src1_modifiers */, (f16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
1048+ 0) /* op_sel */
1049+ >;
1050+ }
1051+
10381052multiclass VOPCClassPat64_fake16<string inst_name> {
10391053 defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
10401054 defvar P = inst.Pfl;
@@ -1158,6 +1172,7 @@ multiclass VOPC_CLASS_F16 <string opName> {
11581172 }
11591173 let True16Predicate = UseRealTrue16Insts in {
11601174 defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
1175+ defm : VOPCClassPat64_t16<NAME>;
11611176 }
11621177 let True16Predicate = UseFakeTrue16Insts in {
11631178 defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
@@ -1207,27 +1222,30 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
12071222
12081223// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
12091224// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
1210- multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
1225+ multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1) > {
12111226 let WaveSizePredicate = isWave64 in
12121227 def : GCNPat <
12131228 (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1214- (i64 (COPY_TO_REGCLASS (inst $src0, $src1) , SReg_64))
1229+ (i64 (COPY_TO_REGCLASS dstInst , SReg_64))
12151230 >;
12161231
12171232 let WaveSizePredicate = isWave32 in {
12181233 def : GCNPat <
12191234 (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1220- (i32 (COPY_TO_REGCLASS (inst $src0, $src1) , SReg_32))
1235+ (i32 (COPY_TO_REGCLASS dstInst , SReg_32))
12211236 >;
12221237
12231238 // Support codegen of i64 setcc in wave32 mode.
12241239 def : GCNPat <
12251240 (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1226- (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1) , sub0, (S_MOV_B32 (i32 0)), sub1))
1241+ (i64 (REG_SEQUENCE SReg_64, dstInst , sub0, (S_MOV_B32 (i32 0)), sub1))
12271242 >;
12281243 }
12291244}
12301245
1246+ multiclass ICMP_Pattern_t16<PatFrags cond, Instruction inst, ValueType vt>
1247+ : ICMP_Pattern<cond, inst, vt, (inst 0, $src0, 0, $src1)>;
1248+
12311249defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
12321250defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
12331251defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
@@ -1250,6 +1268,19 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
12501268defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
12511269defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
12521270
1271+ let True16Predicate = UseRealTrue16Insts in {
1272+ defm : ICMP_Pattern_t16 <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
1273+ defm : ICMP_Pattern_t16 <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
1274+ defm : ICMP_Pattern_t16 <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
1275+ defm : ICMP_Pattern_t16 <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
1276+ defm : ICMP_Pattern_t16 <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
1277+ defm : ICMP_Pattern_t16 <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
1278+ defm : ICMP_Pattern_t16 <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
1279+ defm : ICMP_Pattern_t16 <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
1280+ defm : ICMP_Pattern_t16 <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
1281+ defm : ICMP_Pattern_t16 <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
1282+ } // End True16Predicate = UseRealTrue16Insts
1283+
12531284let True16Predicate = UseFakeTrue16Insts in {
12541285defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
12551286defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
@@ -1335,6 +1366,24 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
13351366defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
13361367defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
13371368
1369+ let True16Predicate = UseRealTrue16Insts in {
1370+ defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
1371+ defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
1372+ defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
1373+ defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
1374+ defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
1375+ defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
1376+ defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
1377+ defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
1378+
1379+ defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
1380+ defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
1381+ defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
1382+ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
1383+ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
1384+ defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
1385+ } // End True16Predicate = UseRealTrue16Insts
1386+
13381387let True16Predicate = UseFakeTrue16Insts in {
13391388defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
13401389defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
0 commit comments