@@ -3480,7 +3480,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34803480 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
34813481 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
34823482 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3483- Opc == AMDGPU::V_FMAC_F16_t16_e64 ) {
3483+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ) {
34843484 // Don't fold if we are using source or output modifiers. The new VOP2
34853485 // instructions don't have them.
34863486 if (hasAnyModifiersSet (UseMI))
@@ -3500,7 +3500,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35003500 bool IsFMA =
35013501 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35023502 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3503- Opc == AMDGPU::V_FMAC_F16_t16_e64 ;
3503+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ;
35043504 MachineOperand *Src1 = getNamedOperand (UseMI, AMDGPU::OpName::src1);
35053505 MachineOperand *Src2 = getNamedOperand (UseMI, AMDGPU::OpName::src2);
35063506
@@ -3533,16 +3533,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35333533
35343534 unsigned NewOpc =
35353535 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3536- : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_t16
3536+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
35373537 : AMDGPU::V_FMAMK_F16)
35383538 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
35393539 if (pseudoToMCOpcode (NewOpc) == -1 )
35403540 return false ;
35413541
3542- // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3542+ // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
35433543 // would also require restricting their register classes. For now
35443544 // just bail out.
3545- if (NewOpc == AMDGPU::V_FMAMK_F16_t16 )
3545+ if (NewOpc == AMDGPU::V_FMAMK_F16_fake16 )
35463546 return false ;
35473547
35483548 const int64_t Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
@@ -3557,8 +3557,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35573557 Src0->setIsKill (RegSrc->isKill ());
35583558
35593559 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3560- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3561- Opc == AMDGPU::V_FMAC_F16_e64)
3560+ Opc == AMDGPU::V_FMAC_F32_e64 ||
3561+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU:: V_FMAC_F16_e64)
35623562 UseMI.untieRegOperand (
35633563 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
35643564
@@ -3612,24 +3612,24 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36123612
36133613 unsigned NewOpc =
36143614 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3615- : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_t16
3615+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
36163616 : AMDGPU::V_FMAAK_F16)
36173617 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36183618 if (pseudoToMCOpcode (NewOpc) == -1 )
36193619 return false ;
36203620
3621- // V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3621+ // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
36223622 // would also require restricting their register classes. For now
36233623 // just bail out.
3624- if (NewOpc == AMDGPU::V_FMAAK_F16_t16 )
3624+ if (NewOpc == AMDGPU::V_FMAAK_F16_fake16 )
36253625 return false ;
36263626
36273627 // FIXME: This would be a lot easier if we could return a new instruction
36283628 // instead of having to modify in place.
36293629
36303630 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3631- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3632- Opc == AMDGPU::V_FMAC_F16_e64)
3631+ Opc == AMDGPU::V_FMAC_F32_e64 ||
3632+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU:: V_FMAC_F16_e64)
36333633 UseMI.untieRegOperand (
36343634 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
36353635
@@ -3852,19 +3852,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38523852 return MIB;
38533853 }
38543854
3855+ // FIXME-TRUE16. assert should be enabled after V_FMAC_F16_t16 is enabled
3856+ #if 0
38553857 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
38563858 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
38573859 "pre-RA");
3860+ #endif
38583861
38593862 // Handle MAC/FMAC.
38603863 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
38613864 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3862- Opc == AMDGPU::V_FMAC_F16_t16_e64 ;
3865+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ;
38633866 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
38643867 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
38653868 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
38663869 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3867- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3870+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
38683871 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38693872 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38703873 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
@@ -3878,7 +3881,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38783881 return nullptr ;
38793882 case AMDGPU::V_MAC_F16_e64:
38803883 case AMDGPU::V_FMAC_F16_e64:
3881- case AMDGPU::V_FMAC_F16_t16_e64 :
3884+ case AMDGPU::V_FMAC_F16_fake16_e64 :
38823885 case AMDGPU::V_MAC_F32_e64:
38833886 case AMDGPU::V_MAC_LEGACY_F32_e64:
38843887 case AMDGPU::V_FMAC_F32_e64:
@@ -3963,7 +3966,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39633966 int64_t Imm;
39643967 if (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
39653968 unsigned NewOpc =
3966- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_t16
3969+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
39673970 : AMDGPU::V_FMAAK_F16)
39683971 : AMDGPU::V_FMAAK_F32)
39693972 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3982,7 +3985,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39823985 }
39833986 }
39843987 unsigned NewOpc =
3985- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_t16
3988+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
39863989 : AMDGPU::V_FMAMK_F16)
39873990 : AMDGPU::V_FMAMK_F32)
39883991 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
@@ -4437,7 +4440,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
44374440 case AMDGPU::V_MAC_F32_e64:
44384441 case AMDGPU::V_MAC_LEGACY_F32_e64:
44394442 case AMDGPU::V_FMAC_F16_e64:
4440- case AMDGPU::V_FMAC_F16_t16_e64 :
4443+ case AMDGPU::V_FMAC_F16_fake16_e64 :
44414444 case AMDGPU::V_FMAC_F32_e64:
44424445 case AMDGPU::V_FMAC_F64_e64:
44434446 case AMDGPU::V_FMAC_LEGACY_F32_e64:
@@ -5484,7 +5487,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54845487 case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54855488 case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54865489 case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5487- case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64 ;
5490+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64 ;
54885491 case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
54895492 case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
54905493 case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
0 commit comments