diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f7cba2f13c77a..8caad5efa1acb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1104,10 +1104,13 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, if (Size == 16 && !ST.has16BitInsts()) return -1; - const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc, + const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, + unsigned FakeS16Opc, unsigned S32Opc, unsigned S64Opc) { if (Size == 16) - return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc; + return ST.hasTrue16BitInsts() + ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc + : S16Opc; if (Size == 32) return S32Opc; return S64Opc; @@ -1118,83 +1121,109 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, llvm_unreachable("Unknown condition code!"); case CmpInst::ICMP_NE: return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64, - AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64); + AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64, + AMDGPU::V_CMP_NE_U64_e64); case CmpInst::ICMP_EQ: return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64, - AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64); + AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64, + AMDGPU::V_CMP_EQ_U64_e64); case CmpInst::ICMP_SGT: return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64, - AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64); + AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64, + AMDGPU::V_CMP_GT_I64_e64); case CmpInst::ICMP_SGE: return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64, - AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64); + AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64, + AMDGPU::V_CMP_GE_I64_e64); case CmpInst::ICMP_SLT: return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64, - AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64); + AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64, + AMDGPU::V_CMP_LT_I64_e64); case CmpInst::ICMP_SLE: return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64, - AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64); + AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64, + AMDGPU::V_CMP_LE_I64_e64); case CmpInst::ICMP_UGT: return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64, - AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64); + AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64, + AMDGPU::V_CMP_GT_U64_e64); case CmpInst::ICMP_UGE: return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64, - AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64); + AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64, + AMDGPU::V_CMP_GE_U64_e64); case CmpInst::ICMP_ULT: return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64, - AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64); + AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64, + AMDGPU::V_CMP_LT_U64_e64); case CmpInst::ICMP_ULE: return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64, - AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64); + AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64, + AMDGPU::V_CMP_LE_U64_e64); case CmpInst::FCMP_OEQ: return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64, - AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64); + AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64, + AMDGPU::V_CMP_EQ_F64_e64); case CmpInst::FCMP_OGT: return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64, - AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64); + AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64, + AMDGPU::V_CMP_GT_F64_e64); case CmpInst::FCMP_OGE: return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64, - AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64); + AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64, + AMDGPU::V_CMP_GE_F64_e64); case CmpInst::FCMP_OLT: return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64, - AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64); + AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64, + AMDGPU::V_CMP_LT_F64_e64); case CmpInst::FCMP_OLE: return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64, - AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64); + AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64, + AMDGPU::V_CMP_LE_F64_e64); case CmpInst::FCMP_ONE: return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64, - AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64); + AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64, + AMDGPU::V_CMP_NEQ_F64_e64); case CmpInst::FCMP_ORD: return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64, - AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64); + AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64, + AMDGPU::V_CMP_O_F64_e64); case CmpInst::FCMP_UNO: return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64, - AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64); + AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64, + AMDGPU::V_CMP_U_F64_e64); case CmpInst::FCMP_UEQ: return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64, - AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64); + AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64, + AMDGPU::V_CMP_NLG_F64_e64); case CmpInst::FCMP_UGT: return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64, - AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64); + AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64, + AMDGPU::V_CMP_NLE_F64_e64); case CmpInst::FCMP_UGE: return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64, - AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64); + AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64, + AMDGPU::V_CMP_NLT_F64_e64); case CmpInst::FCMP_ULT: return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64, - AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64); + AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64, + AMDGPU::V_CMP_NGE_F64_e64); case CmpInst::FCMP_ULE: return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64, - AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64); + AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64, + AMDGPU::V_CMP_NGT_F64_e64); case CmpInst::FCMP_UNE: return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64, - AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64); + AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64, + AMDGPU::V_CMP_NEQ_F64_e64); case CmpInst::FCMP_TRUE: return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64, - AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64); + AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64, + AMDGPU::V_CMP_TRU_F64_e64); case CmpInst::FCMP_FALSE: return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64, - AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64); + AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64, + AMDGPU::V_CMP_F_F64_e64); } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index c864f03f1f0f9..b7c008235fb7a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5501,20 +5501,48 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_CMP_NLE_F32: return AMDGPU::V_CMP_NLE_F32_e64; case AMDGPU::S_CMP_NEQ_F32: return AMDGPU::V_CMP_NEQ_F32_e64; case AMDGPU::S_CMP_NLT_F32: return AMDGPU::V_CMP_NLT_F32_e64; - case AMDGPU::S_CMP_LT_F16: return AMDGPU::V_CMP_LT_F16_t16_e64; - case AMDGPU::S_CMP_EQ_F16: return AMDGPU::V_CMP_EQ_F16_t16_e64; - case AMDGPU::S_CMP_LE_F16: return AMDGPU::V_CMP_LE_F16_t16_e64; - case AMDGPU::S_CMP_GT_F16: return AMDGPU::V_CMP_GT_F16_t16_e64; - case AMDGPU::S_CMP_LG_F16: return AMDGPU::V_CMP_LG_F16_t16_e64; - case AMDGPU::S_CMP_GE_F16: return AMDGPU::V_CMP_GE_F16_t16_e64; - case AMDGPU::S_CMP_O_F16: return AMDGPU::V_CMP_O_F16_t16_e64; - case AMDGPU::S_CMP_U_F16: return AMDGPU::V_CMP_U_F16_t16_e64; - case AMDGPU::S_CMP_NGE_F16: return AMDGPU::V_CMP_NGE_F16_t16_e64; - case AMDGPU::S_CMP_NLG_F16: return AMDGPU::V_CMP_NLG_F16_t16_e64; - case AMDGPU::S_CMP_NGT_F16: return AMDGPU::V_CMP_NGT_F16_t16_e64; - case AMDGPU::S_CMP_NLE_F16: return AMDGPU::V_CMP_NLE_F16_t16_e64; - case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64; - case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64; + case AMDGPU::S_CMP_LT_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64 + : AMDGPU::V_CMP_LT_F16_fake16_e64; + case AMDGPU::S_CMP_EQ_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64 + : AMDGPU::V_CMP_EQ_F16_fake16_e64; + case AMDGPU::S_CMP_LE_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64 + : AMDGPU::V_CMP_LE_F16_fake16_e64; + case AMDGPU::S_CMP_GT_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64 + : AMDGPU::V_CMP_GT_F16_fake16_e64; + case AMDGPU::S_CMP_LG_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64 + : AMDGPU::V_CMP_LG_F16_fake16_e64; + case AMDGPU::S_CMP_GE_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64 + : AMDGPU::V_CMP_GE_F16_fake16_e64; + case AMDGPU::S_CMP_O_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64 + : AMDGPU::V_CMP_O_F16_fake16_e64; + case AMDGPU::S_CMP_U_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64 + : AMDGPU::V_CMP_U_F16_fake16_e64; + case AMDGPU::S_CMP_NGE_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64 + : AMDGPU::V_CMP_NGE_F16_fake16_e64; + case AMDGPU::S_CMP_NLG_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64 + : AMDGPU::V_CMP_NLG_F16_fake16_e64; + case AMDGPU::S_CMP_NGT_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64 + : AMDGPU::V_CMP_NGT_F16_fake16_e64; + case AMDGPU::S_CMP_NLE_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64 + : AMDGPU::V_CMP_NLE_F16_fake16_e64; + case AMDGPU::S_CMP_NEQ_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64 + : AMDGPU::V_CMP_NEQ_F16_fake16_e64; + case AMDGPU::S_CMP_NLT_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64 + : AMDGPU::V_CMP_NLT_F16_fake16_e64; case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64; case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64; case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64; @@ -7324,7 +7352,29 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, case AMDGPU::S_CMP_NGT_F32: case AMDGPU::S_CMP_NLE_F32: case AMDGPU::S_CMP_NEQ_F32: - case AMDGPU::S_CMP_NLT_F32: + case AMDGPU::S_CMP_NLT_F32: { + Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass()); + auto NewInstr = + BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg) + .setMIFlags(Inst.getFlags()); + if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >= + 0) { + NewInstr + .addImm(0) // src0_modifiers + .add(Inst.getOperand(0)) // src0 + .addImm(0) // src1_modifiers + .add(Inst.getOperand(1)) // src1 + .addImm(0); // clamp + } else { + NewInstr.add(Inst.getOperand(0)).add(Inst.getOperand(1)); + } + legalizeOperands(*NewInstr, MDT); + int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr); + MachineOperand SCCOp = Inst.getOperand(SCCIdx); + addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg); + Inst.eraseFromParent(); + return; + } case AMDGPU::S_CMP_LT_F16: case AMDGPU::S_CMP_EQ_F16: case AMDGPU::S_CMP_LE_F16: @@ -7343,14 +7393,15 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg) .setMIFlags(Inst.getFlags()); - if (AMDGPU::getNamedOperandIdx(NewOpcode, - AMDGPU::OpName::src0_modifiers) >= 0) { + if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0_modifiers)) { NewInstr .addImm(0) // src0_modifiers .add(Inst.getOperand(0)) // src0 .addImm(0) // src1_modifiers .add(Inst.getOperand(1)) // src1 .addImm(0); // clamp + if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel)) + NewInstr.addImm(0); // op_sel0 } else { NewInstr .add(Inst.getOperand(0)) diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index f4ccae1decb1d..0a4b51c4ac631 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -192,6 +192,8 @@ class VOPC_Real .ret>, + def _e64 : VOP3_Pseudo.ret, /*IsVOP3P*/false, P.HasOpSel>, Commutable_REV, VCMPXNoSDstTable<1, opName#"_e64">, VCMPVCMPXTable { @@ -373,7 +375,7 @@ multiclass VOPCX_Pseudos , + def _nosdst_e64 : VOP3_Pseudo, Commutable_REV, VCMPXNoSDstTable<0, opName#"_e64">, VCMPVCMPXTable { @@ -799,11 +801,22 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">; // Class instructions //===----------------------------------------------------------------------===// -class VOPC_Class_Profile sched, ValueType src0VT, ValueType src1VT = i32> : +class VOPC_Class_Profile_Base sched, ValueType src0VT, ValueType src1VT = i32> : VOPC_Profile { + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, + Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); + + let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; + let HasClamp = 0; + let HasOMod = 0; +} + +class VOPC_Class_Profile sched, ValueType src0VT, ValueType src1VT = i32> : + VOPC_Class_Profile_Base { let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP16 = AsmDPP#"$fi"; - let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); + let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); // DPP8 forbids modifiers and can inherit from VOPC_Profile @@ -812,15 +825,7 @@ class VOPC_Class_Profile sched, ValueType src0VT, ValueType let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel), (ins))); let AsmVOP3Base = "$sdst, $src0_modifiers, $src1"; - - let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, - Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, - Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); - - let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; let HasSrc1Mods = 0; - let HasClamp = 0; - let HasOMod = 0; } multiclass VOPC_Class_Profile_t16 sched> { @@ -837,16 +842,25 @@ multiclass VOPC_Class_Profile_t16 sched> { let Src1ModDPP = getSrcModDPP_t16.ret; let Src2ModDPP = getSrcModDPP_t16.ret; } - def _fake16 : VOPC_Class_Profile { + def _fake16 : VOPC_Class_Profile_Base { let IsTrue16 = 1; + let DstRC = getVALUDstForVT_fake16.ret; + let DstRC64 = getVALUDstForVT.ret; + let Src0RC32 = getVOPSrc0ForVT.ret; let Src1RC32 = getVregSrcForVT.ret; let Src1RC64 = VSrc_b32; let Src0DPP = getVregSrcForVT.ret; let Src1DPP = getVregSrcForVT.ret; let Src2DPP = getVregSrcForVT.ret; - let Src0ModDPP = getSrcModDPP_t16.ret; - let Src1ModDPP = getSrcModDPP_t16.ret; - let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; + let Src0VOP3DPP = VGPRSrc_32; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src2VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; + let Src2ModVOP3DPP = getSrcModVOP3DPP.ret; } } @@ -889,17 +903,34 @@ multiclass VOPC_Class_NoSdst_Profile_t16 sched> { } } -class getVOPCClassPat64 { - list ret = - [(set i1:$sdst, +multiclass VOPCClassPat64 { + defvar inst = !cast(inst_name#"_e64"); + defvar P = inst.Pfl; + def : GCNPat < + (i1:$sdst (AMDGPUfp_class (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)), - i32:$src1))]; + P.Src1VT:$src1)), + (inst i32:$src0_modifiers, P.Src0VT:$src0, P.Src1VT:$src1) + >; } +multiclass VOPCClassPat64_fake16 { + defvar inst = !cast(inst_name#"_fake16_e64"); + defvar P = inst.Pfl; + def : GCNPat < + (i1:$sdst + (AMDGPUfp_class + (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)), + i32:$src1)), + (inst i32:$src0_modifiers, P.Src0VT:$src0, + 0 /*src1_modifiers*/, VGPR_32:$src1) + >; +} -// Special case for class instructions which only have modifiers on -// the 1st source operand. +// cmp_class ignores the FP mode and faithfully reports the unmodified +// source value. +let ReadsModeReg = 0, mayRaiseFPException = 0 in { multiclass VOPC_Class_Pseudos { def _e32 : VOPC_Pseudo , @@ -910,7 +941,7 @@ multiclass VOPC_Class_Pseudos .ret>, + def _e64 : VOP3_Pseudo, VCMPXNoSDstTable<1, opName#"_e64"> { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; @@ -957,7 +988,7 @@ multiclass VOPCX_Class_Pseudos , + def _nosdst_e64 : VOP3_Pseudo, VCMPXNoSDstTable<0, opName#"_e64"> { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; @@ -990,6 +1021,7 @@ multiclass VOPCX_Class_Pseudos ; def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>; @@ -1002,12 +1034,14 @@ def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>; multiclass VOPC_CLASS_F16 { let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { defm NAME : VOPC_Class_Pseudos ; + defm : VOPCClassPat64; } - let OtherPredicates = [UseRealTrue16Insts] in { + let True16Predicate = UseRealTrue16Insts in { defm _t16 : VOPC_Class_Pseudos ; } - let OtherPredicates = [UseFakeTrue16Insts] in { + let True16Predicate = UseFakeTrue16Insts in { defm _fake16 : VOPC_Class_Pseudos ; + defm : VOPCClassPat64_fake16; } } @@ -1023,21 +1057,22 @@ multiclass VOPCX_CLASS_F16 { } } -multiclass VOPC_CLASS_F32 : - VOPC_Class_Pseudos ; +multiclass VOPC_CLASS_F32 { + defm NAME : VOPC_Class_Pseudos ; + defm : VOPCClassPat64; +} multiclass VOPCX_CLASS_F32 : VOPCX_Class_Pseudos ; -multiclass VOPC_CLASS_F64 : - VOPC_Class_Pseudos ; +multiclass VOPC_CLASS_F64 { + defm NAME : VOPC_Class_Pseudos ; + defm : VOPCClassPat64; +} multiclass VOPCX_CLASS_F64 : VOPCX_Class_Pseudos ; -// cmp_class ignores the FP mode and faithfully reports the unmodified -// source value. -let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">; defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; @@ -1045,7 +1080,6 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; -} // End ReadsModeReg = 0, mayRaiseFPException = 0 //===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. @@ -1283,6 +1317,7 @@ class VOPC_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOPC_DPP_Base { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; + let True16Predicate = ps.True16Predicate; let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; let SchedRW = ps.SchedRW; @@ -1303,6 +1338,7 @@ class VOPC_DPP8 op, VOPC_Pseudo ps, string opName = ps.OpName> let SchedRW = ps.SchedRW; let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; + let True16Predicate = ps.True16Predicate; let Constraints = ""; } @@ -1333,6 +1369,7 @@ class VOPC64_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP_Base { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; + let True16Predicate = ps.True16Predicate; let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; let SchedRW = ps.SchedRW; @@ -1375,6 +1412,7 @@ class VOPC64_DPP8 op, VOP_Pseudo ps, string opName = ps.OpName> let SchedRW = ps.SchedRW; let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; + let True16Predicate = ps.True16Predicate; } class VOPC64_DPP8_Dst op, VOP_Pseudo ps, string opName = ps.OpName> diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index aa9758219db91..eb9d00972468c 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1958,12 +1958,18 @@ class ClassPat : GCNPat < (inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask)) >; +class ClassPat_t16 : GCNPat < + (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)), + (inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask)) +>; + def : ClassPat { - let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts]; + let OtherPredicates = [Has16BitInsts]; + let True16Predicate = NotHasTrue16BitInsts; } -def : ClassPat { - let OtherPredicates = [HasTrue16BitInsts]; +def : ClassPat_t16 { + let True16Predicate = UseFakeTrue16Insts; } def : ClassPat; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir index b5f91b6b86083..55015c6d13d8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir @@ -30,8 +30,8 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]] + ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_FPTRUNC %0 @@ -68,8 +68,8 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]] + ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_FPTRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir index a67a0b6455fac..4241f945a87d5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir @@ -30,8 +30,8 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]] + ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_FPTRUNC %0 @@ -68,8 +68,8 @@ body: | ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec - ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]] + ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec + ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_FPTRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir index e994f42e0d60d..d45bc31a12729 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir @@ -20,6 +20,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; ; WAVE32-LABEL: name: icmp_eq_s16_sv ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -27,13 +28,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; ; GFX11-LABEL: name: icmp_eq_s16_sv ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:sgpr(s16) = G_TRUNC %0 @@ -59,6 +61,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; ; WAVE32-LABEL: name: icmp_eq_s16_vs ; WAVE32: liveins: $sgpr0, $vgpr0 ; WAVE32-NEXT: {{ $}} @@ -66,13 +69,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; ; GFX11-LABEL: name: icmp_eq_s16_vs ; GFX11: liveins: $sgpr0, $vgpr0 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 @@ -98,6 +102,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; ; WAVE32-LABEL: name: icmp_eq_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -105,13 +110,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]] + ; ; GFX11-LABEL: name: icmp_eq_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -137,6 +143,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] + ; ; WAVE32-LABEL: name: icmp_ne_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -144,13 +151,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]] + ; ; GFX11-LABEL: name: icmp_ne_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -176,6 +184,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] + ; ; WAVE32-LABEL: name: icmp_slt_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -183,13 +192,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]] + ; ; GFX11-LABEL: name: icmp_slt_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -215,6 +225,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] + ; ; WAVE32-LABEL: name: icmp_sle_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -222,13 +233,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]] + ; ; GFX11-LABEL: name: icmp_sle_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -254,6 +266,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] + ; ; WAVE32-LABEL: name: icmp_ult_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -261,13 +274,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]] + ; ; GFX11-LABEL: name: icmp_ult_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -293,6 +307,7 @@ body: | ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE64-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] + ; ; WAVE32-LABEL: name: icmp_ule_s16_vv ; WAVE32: liveins: $vgpr0, $vgpr1 ; WAVE32-NEXT: {{ $}} @@ -300,13 +315,14 @@ body: | ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]] + ; ; GFX11-LABEL: name: icmp_ule_s16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 [[COPY]], [[COPY1]], implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]] + ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir index 30a24c675a76b..5d90bab1384eb 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir @@ -12,8 +12,8 @@ body: | ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[V_CVT_F16_U16_fake16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec + ; GCN-NEXT: [[V_CMP_LT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_fake16_e64 0, [[V_CVT_F16_U16_fake16_e64_]], 0, [[DEF1]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_fake16_e64_]], implicit $exec %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir index be759049bc3a7..245c5e1005d08 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir @@ -11,8 +11,8 @@ body: | ; GFX1100-LABEL: name: 16bit_lo128_shrink ; GFX1100: liveins: $vgpr127 ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec - $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec + ; GFX1100-NEXT: V_CMP_EQ_U16_fake16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec + $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec ... --- @@ -24,6 +24,6 @@ body: | ; GFX1100-LABEL: name: 16bit_lo128_no_shrink ; GFX1100: liveins: $vgpr128 ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec - $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec + ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec + $vcc_lo = V_CMP_EQ_U16_fake16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir index 123893674ff5e..656c849bbd56b 100644 --- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -18,15 +18,15 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec - ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMPX_EQ_I16_fake16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F16_fake16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_fake16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, [[V_CMP_NGE_F16_fake16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec @@ -40,13 +40,13 @@ body: | ; unsafe to combine cmpx %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + V_CMPX_EQ_I16_fake16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec + %7:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %6, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec + %9:sgpr_32 = V_CMP_GE_F16_fake16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec ; unsafe to combine cmpx %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec @@ -61,7 +61,7 @@ body: | ; do not shrink True16 instructions %15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec + %16:sgpr_32 = V_CMP_NGE_F16_fake16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec ; do not shrink, sdst used %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec @@ -89,7 +89,7 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec - ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_fake16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, [[V_MOV_B32_dpp]], 0, [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec %0:vgpr_32 = COPY $vgpr0 @@ -100,7 +100,7 @@ body: | ; Do not combine VOPC when row_mask or bank_mask is not 0xf ; All cases are covered by generic rules for creating DPP instructions %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec - %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec + %99:sgpr_32 = V_CMP_CLASS_F16_fake16_e64 0, %4, 0, %0, implicit-def $vcc, implicit $mode, implicit $exec %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec %6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec