@@ -3479,7 +3479,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34793479 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
34803480 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
34813481 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3482- Opc == AMDGPU::V_FMAC_F16_t16_e64 ) {
3482+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ) {
34833483 // Don't fold if we are using source or output modifiers. The new VOP2
34843484 // instructions don't have them.
34853485 if (hasAnyModifiersSet (UseMI))
@@ -3499,7 +3499,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34993499 bool IsFMA =
35003500 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35013501 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3502- Opc == AMDGPU::V_FMAC_F16_t16_e64 ;
3502+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ;
35033503 MachineOperand *Src1 = getNamedOperand (UseMI, AMDGPU::OpName::src1);
35043504 MachineOperand *Src2 = getNamedOperand (UseMI, AMDGPU::OpName::src2);
35053505
@@ -3532,16 +3532,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35323532
35333533 unsigned NewOpc =
35343534 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3535- : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_t16
3535+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
35363536 : AMDGPU::V_FMAMK_F16)
35373537 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
35383538 if (pseudoToMCOpcode (NewOpc) == -1 )
35393539 return false ;
35403540
3541- // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3541+ // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
35423542 // would also require restricting their register classes. For now
35433543 // just bail out.
3544- if (NewOpc == AMDGPU::V_FMAMK_F16_t16 )
3544+ if (NewOpc == AMDGPU::V_FMAMK_F16_fake16 )
35453545 return false ;
35463546
35473547 const int64_t Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
@@ -3556,8 +3556,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35563556 Src0->setIsKill (RegSrc->isKill ());
35573557
35583558 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3559- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3560- Opc == AMDGPU::V_FMAC_F16_e64)
3559+ Opc == AMDGPU::V_FMAC_F32_e64 ||
3560+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU:: V_FMAC_F16_e64)
35613561 UseMI.untieRegOperand (
35623562 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
35633563
@@ -3611,24 +3611,24 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36113611
36123612 unsigned NewOpc =
36133613 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3614- : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_t16
3614+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
36153615 : AMDGPU::V_FMAAK_F16)
36163616 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36173617 if (pseudoToMCOpcode (NewOpc) == -1 )
36183618 return false ;
36193619
3620- // V_FMAAK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
3620+ // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
36213621 // would also require restricting their register classes. For now
36223622 // just bail out.
3623- if (NewOpc == AMDGPU::V_FMAAK_F16_t16 )
3623+ if (NewOpc == AMDGPU::V_FMAAK_F16_fake16 )
36243624 return false ;
36253625
36263626 // FIXME: This would be a lot easier if we could return a new instruction
36273627 // instead of having to modify in place.
36283628
36293629 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3630- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3631- Opc == AMDGPU::V_FMAC_F16_e64)
3630+ Opc == AMDGPU::V_FMAC_F32_e64 ||
3631+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU:: V_FMAC_F16_e64)
36323632 UseMI.untieRegOperand (
36333633 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
36343634
@@ -3851,19 +3851,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38513851 return MIB;
38523852 }
38533853
3854- assert (Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3855- " V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3856- " pre-RA" );
3854+ assert (
3855+ Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3856+ " V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3857+ " pre-RA" );
38573858
38583859 // Handle MAC/FMAC.
38593860 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
38603861 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3861- Opc == AMDGPU::V_FMAC_F16_t16_e64 ;
3862+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ;
38623863 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
38633864 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
38643865 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
38653866 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3866- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3867+ Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
38673868 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38683869 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
38693870 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
@@ -3877,7 +3878,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
38773878 return nullptr ;
38783879 case AMDGPU::V_MAC_F16_e64:
38793880 case AMDGPU::V_FMAC_F16_e64:
3880- case AMDGPU::V_FMAC_F16_t16_e64 :
3881+ case AMDGPU::V_FMAC_F16_fake16_e64 :
38813882 case AMDGPU::V_MAC_F32_e64:
38823883 case AMDGPU::V_MAC_LEGACY_F32_e64:
38833884 case AMDGPU::V_FMAC_F32_e64:
@@ -3962,7 +3963,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39623963 int64_t Imm;
39633964 if (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
39643965 unsigned NewOpc =
3965- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_t16
3966+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
39663967 : AMDGPU::V_FMAAK_F16)
39673968 : AMDGPU::V_FMAAK_F32)
39683969 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
@@ -3981,7 +3982,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39813982 }
39823983 }
39833984 unsigned NewOpc =
3984- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_t16
3985+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
39853986 : AMDGPU::V_FMAMK_F16)
39863987 : AMDGPU::V_FMAMK_F32)
39873988 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
@@ -4436,7 +4437,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
44364437 case AMDGPU::V_MAC_F32_e64:
44374438 case AMDGPU::V_MAC_LEGACY_F32_e64:
44384439 case AMDGPU::V_FMAC_F16_e64:
4439- case AMDGPU::V_FMAC_F16_t16_e64 :
4440+ case AMDGPU::V_FMAC_F16_fake16_e64 :
44404441 case AMDGPU::V_FMAC_F32_e64:
44414442 case AMDGPU::V_FMAC_F64_e64:
44424443 case AMDGPU::V_FMAC_LEGACY_F32_e64:
@@ -5483,7 +5484,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54835484 case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54845485 case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54855486 case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5486- case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_t16_e64 ;
5487+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64 ;
54875488 case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
54885489 case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
54895490 case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
0 commit comments