@@ -814,7 +814,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
814814 if (Fix16BitCopies) {
815815 if (((Size == 16 ) != (SrcSize == 16 ))) {
816816 // Non-VGPR Src and Dst will later be expanded back to 32 bits.
817- assert (ST.hasTrue16BitInsts ());
817+ assert (ST.useRealTrue16Insts ());
818818 MCRegister &RegToFix = (Size == 32 ) ? DestReg : SrcReg;
819819 MCRegister SubReg = RI.getSubReg (RegToFix, AMDGPU::lo16);
820820 RegToFix = SubReg;
@@ -988,7 +988,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
988988 return ;
989989 }
990990
991- if (ST.hasTrue16BitInsts ()) {
991+ if (ST.useRealTrue16Insts ()) {
992992 if (IsSGPRSrc) {
993993 assert (SrcLow);
994994 SrcReg = NewSrcReg;
@@ -5559,30 +5559,44 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
55595559 return ST.useRealTrue16Insts () ? AMDGPU::V_FLOOR_F16_t16_e64
55605560 : AMDGPU::V_FLOOR_F16_fake16_e64;
55615561 case AMDGPU::S_TRUNC_F16:
5562- return AMDGPU::V_TRUNC_F16_fake16_e64;
5562+ return ST.useRealTrue16Insts () ? AMDGPU::V_TRUNC_F16_t16_e64
5563+ : AMDGPU::V_TRUNC_F16_fake16_e64;
55635564 case AMDGPU::S_RNDNE_F16:
5564- return AMDGPU::V_RNDNE_F16_fake16_e64;
5565+ return ST.useRealTrue16Insts () ? AMDGPU::V_RNDNE_F16_t16_e64
5566+ : AMDGPU::V_RNDNE_F16_fake16_e64;
55655567 case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
55665568 case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
55675569 case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
55685570 case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
55695571 case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
55705572 case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
55715573 case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
5572- case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
5573- case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
5574- case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
5575- case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5574+ case AMDGPU::S_ADD_F16:
5575+ return ST.useRealTrue16Insts () ? AMDGPU::V_ADD_F16_t16_e64
5576+ : AMDGPU::V_ADD_F16_fake16_e64;
5577+ case AMDGPU::S_SUB_F16:
5578+ return ST.useRealTrue16Insts () ? AMDGPU::V_SUB_F16_t16_e64
5579+ : AMDGPU::V_SUB_F16_fake16_e64;
5580+ case AMDGPU::S_MIN_F16:
5581+ return ST.useRealTrue16Insts () ? AMDGPU::V_MIN_F16_t16_e64
5582+ : AMDGPU::V_MIN_F16_fake16_e64;
5583+ case AMDGPU::S_MAX_F16:
5584+ return ST.useRealTrue16Insts () ? AMDGPU::V_MAX_F16_t16_e64
5585+ : AMDGPU::V_MAX_F16_fake16_e64;
55765586 case AMDGPU::S_MINIMUM_F16:
55775587 return ST.useRealTrue16Insts () ? AMDGPU::V_MINIMUM_F16_t16_e64
55785588 : AMDGPU::V_MINIMUM_F16_fake16_e64;
55795589 case AMDGPU::S_MAXIMUM_F16:
55805590 return ST.useRealTrue16Insts () ? AMDGPU::V_MAXIMUM_F16_t16_e64
55815591 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5582- case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5592+ case AMDGPU::S_MUL_F16:
5593+ return ST.useRealTrue16Insts () ? AMDGPU::V_MUL_F16_t16_e64
5594+ : AMDGPU::V_MUL_F16_fake16_e64;
55835595 case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
55845596 case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5585- case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
5597+ case AMDGPU::S_FMAC_F16:
5598+ return ST.useRealTrue16Insts () ? AMDGPU::V_FMAC_F16_t16_e64
5599+ : AMDGPU::V_FMAC_F16_fake16_e64;
55865600 case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
55875601 case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
55885602 case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
@@ -5642,15 +5656,25 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56425656 return ST.useRealTrue16Insts () ? AMDGPU::V_CMP_NLT_F16_t16_e64
56435657 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
56445658 case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5645- case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
5659+ case AMDGPU::V_S_EXP_F16_e64:
5660+ return ST.useRealTrue16Insts () ? AMDGPU::V_EXP_F16_t16_e64
5661+ : AMDGPU::V_EXP_F16_fake16_e64;
56465662 case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5647- case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
5663+ case AMDGPU::V_S_LOG_F16_e64:
5664+ return ST.useRealTrue16Insts () ? AMDGPU::V_LOG_F16_t16_e64
5665+ : AMDGPU::V_LOG_F16_fake16_e64;
56485666 case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5649- case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
5667+ case AMDGPU::V_S_RCP_F16_e64:
5668+ return ST.useRealTrue16Insts () ? AMDGPU::V_RCP_F16_t16_e64
5669+ : AMDGPU::V_RCP_F16_fake16_e64;
56505670 case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5651- case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
5671+ case AMDGPU::V_S_RSQ_F16_e64:
5672+ return ST.useRealTrue16Insts () ? AMDGPU::V_RSQ_F16_t16_e64
5673+ : AMDGPU::V_RSQ_F16_fake16_e64;
56525674 case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5653- case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
5675+ case AMDGPU::V_S_SQRT_F16_e64:
5676+ return ST.useRealTrue16Insts () ? AMDGPU::V_SQRT_F16_t16_e64
5677+ : AMDGPU::V_SQRT_F16_fake16_e64;
56545678 }
56555679 llvm_unreachable (
56565680 " Unexpected scalar opcode without corresponding vector one!" );
0 commit comments