Skip to content

Commit 87d820e

Browse files
committed
[AMDGPU][True16][MC] update VOPC profile with latest vop3 true16, use
f16 for fake16 format
1 parent b62557a commit 87d820e

File tree

10 files changed

+262
-122
lines changed

10 files changed

+262
-122
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,10 +1104,13 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
11041104
if (Size == 16 && !ST.has16BitInsts())
11051105
return -1;
11061106

1107-
const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
1107+
const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc,
1108+
unsigned FakeS16Opc, unsigned S32Opc,
11081109
unsigned S64Opc) {
11091110
if (Size == 16)
1110-
return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1111+
return ST.hasTrue16BitInsts()
1112+
? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1113+
: S16Opc;
11111114
if (Size == 32)
11121115
return S32Opc;
11131116
return S64Opc;
@@ -1118,83 +1121,109 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
11181121
llvm_unreachable("Unknown condition code!");
11191122
case CmpInst::ICMP_NE:
11201123
return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1121-
AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1124+
AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1125+
AMDGPU::V_CMP_NE_U64_e64);
11221126
case CmpInst::ICMP_EQ:
11231127
return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1124-
AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1128+
AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1129+
AMDGPU::V_CMP_EQ_U64_e64);
11251130
case CmpInst::ICMP_SGT:
11261131
return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1127-
AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1132+
AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1133+
AMDGPU::V_CMP_GT_I64_e64);
11281134
case CmpInst::ICMP_SGE:
11291135
return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1130-
AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1136+
AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1137+
AMDGPU::V_CMP_GE_I64_e64);
11311138
case CmpInst::ICMP_SLT:
11321139
return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1133-
AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1140+
AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1141+
AMDGPU::V_CMP_LT_I64_e64);
11341142
case CmpInst::ICMP_SLE:
11351143
return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1136-
AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1144+
AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1145+
AMDGPU::V_CMP_LE_I64_e64);
11371146
case CmpInst::ICMP_UGT:
11381147
return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1139-
AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1148+
AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1149+
AMDGPU::V_CMP_GT_U64_e64);
11401150
case CmpInst::ICMP_UGE:
11411151
return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1142-
AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1152+
AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1153+
AMDGPU::V_CMP_GE_U64_e64);
11431154
case CmpInst::ICMP_ULT:
11441155
return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1145-
AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1156+
AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1157+
AMDGPU::V_CMP_LT_U64_e64);
11461158
case CmpInst::ICMP_ULE:
11471159
return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1148-
AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1160+
AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1161+
AMDGPU::V_CMP_LE_U64_e64);
11491162

11501163
case CmpInst::FCMP_OEQ:
11511164
return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1152-
AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1165+
AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1166+
AMDGPU::V_CMP_EQ_F64_e64);
11531167
case CmpInst::FCMP_OGT:
11541168
return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1155-
AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1169+
AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1170+
AMDGPU::V_CMP_GT_F64_e64);
11561171
case CmpInst::FCMP_OGE:
11571172
return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1158-
AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1173+
AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1174+
AMDGPU::V_CMP_GE_F64_e64);
11591175
case CmpInst::FCMP_OLT:
11601176
return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1161-
AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1177+
AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1178+
AMDGPU::V_CMP_LT_F64_e64);
11621179
case CmpInst::FCMP_OLE:
11631180
return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1164-
AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1181+
AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1182+
AMDGPU::V_CMP_LE_F64_e64);
11651183
case CmpInst::FCMP_ONE:
11661184
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1167-
AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1185+
AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1186+
AMDGPU::V_CMP_NEQ_F64_e64);
11681187
case CmpInst::FCMP_ORD:
11691188
return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1170-
AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1189+
AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1190+
AMDGPU::V_CMP_O_F64_e64);
11711191
case CmpInst::FCMP_UNO:
11721192
return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1173-
AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1193+
AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1194+
AMDGPU::V_CMP_U_F64_e64);
11741195
case CmpInst::FCMP_UEQ:
11751196
return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1176-
AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1197+
AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1198+
AMDGPU::V_CMP_NLG_F64_e64);
11771199
case CmpInst::FCMP_UGT:
11781200
return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1179-
AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1201+
AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1202+
AMDGPU::V_CMP_NLE_F64_e64);
11801203
case CmpInst::FCMP_UGE:
11811204
return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1182-
AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1205+
AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1206+
AMDGPU::V_CMP_NLT_F64_e64);
11831207
case CmpInst::FCMP_ULT:
11841208
return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1185-
AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1209+
AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1210+
AMDGPU::V_CMP_NGE_F64_e64);
11861211
case CmpInst::FCMP_ULE:
11871212
return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1188-
AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1213+
AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1214+
AMDGPU::V_CMP_NGT_F64_e64);
11891215
case CmpInst::FCMP_UNE:
11901216
return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1191-
AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1217+
AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1218+
AMDGPU::V_CMP_NEQ_F64_e64);
11921219
case CmpInst::FCMP_TRUE:
11931220
return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1194-
AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1221+
AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1222+
AMDGPU::V_CMP_TRU_F64_e64);
11951223
case CmpInst::FCMP_FALSE:
11961224
return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1197-
AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1225+
AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1226+
AMDGPU::V_CMP_F_F64_e64);
11981227
}
11991228
}
12001229

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5501,20 +5501,48 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
55015501
case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64;
55025502
case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64;
55035503
case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64;
5504-
case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64;
5505-
case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64;
5506-
case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64;
5507-
case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64;
5508-
case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64;
5509-
case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64;
5510-
case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64;
5511-
case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64;
5512-
case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64;
5513-
case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64;
5514-
case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64;
5515-
case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64;
5516-
case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
5517-
case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
5504+
case AMDGPU::S_CMP_LT_F16:
5505+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5506+
: AMDGPU::V_CMP_LT_F16_fake16_e64;
5507+
case AMDGPU::S_CMP_EQ_F16:
5508+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5509+
: AMDGPU::V_CMP_EQ_F16_fake16_e64;
5510+
case AMDGPU::S_CMP_LE_F16:
5511+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5512+
: AMDGPU::V_CMP_LE_F16_fake16_e64;
5513+
case AMDGPU::S_CMP_GT_F16:
5514+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5515+
: AMDGPU::V_CMP_GT_F16_fake16_e64;
5516+
case AMDGPU::S_CMP_LG_F16:
5517+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5518+
: AMDGPU::V_CMP_LG_F16_fake16_e64;
5519+
case AMDGPU::S_CMP_GE_F16:
5520+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5521+
: AMDGPU::V_CMP_GE_F16_fake16_e64;
5522+
case AMDGPU::S_CMP_O_F16:
5523+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5524+
: AMDGPU::V_CMP_O_F16_fake16_e64;
5525+
case AMDGPU::S_CMP_U_F16:
5526+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5527+
: AMDGPU::V_CMP_U_F16_fake16_e64;
5528+
case AMDGPU::S_CMP_NGE_F16:
5529+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5530+
: AMDGPU::V_CMP_NGE_F16_fake16_e64;
5531+
case AMDGPU::S_CMP_NLG_F16:
5532+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5533+
: AMDGPU::V_CMP_NLG_F16_fake16_e64;
5534+
case AMDGPU::S_CMP_NGT_F16:
5535+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5536+
: AMDGPU::V_CMP_NGT_F16_fake16_e64;
5537+
case AMDGPU::S_CMP_NLE_F16:
5538+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5539+
: AMDGPU::V_CMP_NLE_F16_fake16_e64;
5540+
case AMDGPU::S_CMP_NEQ_F16:
5541+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5542+
: AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5543+
case AMDGPU::S_CMP_NLT_F16:
5544+
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5545+
: AMDGPU::V_CMP_NLT_F16_fake16_e64;
55185546
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
55195547
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
55205548
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
@@ -7324,7 +7352,29 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73247352
case AMDGPU::S_CMP_NGT_F32:
73257353
case AMDGPU::S_CMP_NLE_F32:
73267354
case AMDGPU::S_CMP_NEQ_F32:
7327-
case AMDGPU::S_CMP_NLT_F32:
7355+
case AMDGPU::S_CMP_NLT_F32: {
7356+
Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7357+
auto NewInstr =
7358+
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
7359+
.setMIFlags(Inst.getFlags());
7360+
if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7361+
0) {
7362+
NewInstr
7363+
.addImm(0) // src0_modifiers
7364+
.add(Inst.getOperand(0)) // src0
7365+
.addImm(0) // src1_modifiers
7366+
.add(Inst.getOperand(1)) // src1
7367+
.addImm(0); // clamp
7368+
} else {
7369+
NewInstr.add(Inst.getOperand(0)).add(Inst.getOperand(1));
7370+
}
7371+
legalizeOperands(*NewInstr, MDT);
7372+
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
7373+
MachineOperand SCCOp = Inst.getOperand(SCCIdx);
7374+
addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7375+
Inst.eraseFromParent();
7376+
return;
7377+
}
73287378
case AMDGPU::S_CMP_LT_F16:
73297379
case AMDGPU::S_CMP_EQ_F16:
73307380
case AMDGPU::S_CMP_LE_F16:
@@ -7343,14 +7393,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73437393
auto NewInstr =
73447394
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
73457395
.setMIFlags(Inst.getFlags());
7346-
if (AMDGPU::getNamedOperandIdx(NewOpcode,
7347-
AMDGPU::OpName::src0_modifiers) >= 0) {
7396+
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0_modifiers)) {
73487397
NewInstr
73497398
.addImm(0) // src0_modifiers
73507399
.add(Inst.getOperand(0)) // src0
73517400
.addImm(0) // src1_modifiers
73527401
.add(Inst.getOperand(1)) // src1
73537402
.addImm(0); // clamp
7403+
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
7404+
NewInstr.addImm(0); // op_sel0
73547405
} else {
73557406
NewInstr
73567407
.add(Inst.getOperand(0))

0 commit comments

Comments
 (0)