@@ -815,7 +815,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
815815 if (Fix16BitCopies) {
816816 if (((Size == 16 ) != (SrcSize == 16 ))) {
817817 // Non-VGPR Src and Dst will later be expanded back to 32 bits.
818- assert (ST.hasTrue16BitInsts ());
818+ assert (ST.useRealTrue16Insts ());
819819 Register &RegToFix = (Size == 32 ) ? DestReg : SrcReg;
820820 MCRegister SubReg = RI.getSubReg (RegToFix, AMDGPU::lo16);
821821 RegToFix = SubReg;
@@ -989,7 +989,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
989989 return ;
990990 }
991991
992- if (ST.hasTrue16BitInsts ()) {
992+ if (ST.useRealTrue16Insts ()) {
993993 if (IsSGPRSrc) {
994994 assert (SrcLow);
995995 SrcReg = NewSrcReg;
@@ -5579,27 +5579,39 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
55795579 return ST.useRealTrue16Insts () ? AMDGPU::V_FLOOR_F16_t16_e64
55805580 : AMDGPU::V_FLOOR_F16_fake16_e64;
55815581 case AMDGPU::S_TRUNC_F16:
5582- return AMDGPU::V_TRUNC_F16_fake16_e64;
5582+ return ST.useRealTrue16Insts () ? AMDGPU::V_TRUNC_F16_t16_e64
5583+ : AMDGPU::V_TRUNC_F16_fake16_e64;
55835584 case AMDGPU::S_RNDNE_F16:
5584- return AMDGPU::V_RNDNE_F16_fake16_e64;
5585+ return ST.useRealTrue16Insts () ? AMDGPU::V_RNDNE_F16_t16_e64
5586+ : AMDGPU::V_RNDNE_F16_fake16_e64;
55855587 case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
55865588 case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
55875589 case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
55885590 case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
55895591 case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
55905592 case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
55915593 case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
5592- case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
5593- case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
5594- case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
5595- case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5594+ case AMDGPU::S_ADD_F16:
5595+ return ST.useRealTrue16Insts () ? AMDGPU::V_ADD_F16_t16_e64
5596+ : AMDGPU::V_ADD_F16_fake16_e64;
5597+ case AMDGPU::S_SUB_F16:
5598+ return ST.useRealTrue16Insts () ? AMDGPU::V_SUB_F16_t16_e64
5599+ : AMDGPU::V_SUB_F16_fake16_e64;
5600+ case AMDGPU::S_MIN_F16:
5601+ return ST.useRealTrue16Insts () ? AMDGPU::V_MIN_F16_t16_e64
5602+ : AMDGPU::V_MIN_F16_fake16_e64;
5603+ case AMDGPU::S_MAX_F16:
5604+ return ST.useRealTrue16Insts () ? AMDGPU::V_MAX_F16_t16_e64
5605+ : AMDGPU::V_MAX_F16_fake16_e64;
55965606 case AMDGPU::S_MINIMUM_F16:
55975607 return ST.useRealTrue16Insts () ? AMDGPU::V_MINIMUM_F16_t16_e64
55985608 : AMDGPU::V_MINIMUM_F16_fake16_e64;
55995609 case AMDGPU::S_MAXIMUM_F16:
56005610 return ST.useRealTrue16Insts () ? AMDGPU::V_MAXIMUM_F16_t16_e64
56015611 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5602- case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5612+ case AMDGPU::S_MUL_F16:
5613+ return ST.useRealTrue16Insts () ? AMDGPU::V_MUL_F16_t16_e64
5614+ : AMDGPU::V_MUL_F16_fake16_e64;
56035615 case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
56045616 case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
56055617 case AMDGPU::S_FMAC_F16:
@@ -5664,15 +5676,25 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56645676 return ST.useRealTrue16Insts () ? AMDGPU::V_CMP_NLT_F16_t16_e64
56655677 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
56665678 case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5667- case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
5679+ case AMDGPU::V_S_EXP_F16_e64:
5680+ return ST.useRealTrue16Insts () ? AMDGPU::V_EXP_F16_t16_e64
5681+ : AMDGPU::V_EXP_F16_fake16_e64;
56685682 case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5669- case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
5683+ case AMDGPU::V_S_LOG_F16_e64:
5684+ return ST.useRealTrue16Insts () ? AMDGPU::V_LOG_F16_t16_e64
5685+ : AMDGPU::V_LOG_F16_fake16_e64;
56705686 case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5671- case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
5687+ case AMDGPU::V_S_RCP_F16_e64:
5688+ return ST.useRealTrue16Insts () ? AMDGPU::V_RCP_F16_t16_e64
5689+ : AMDGPU::V_RCP_F16_fake16_e64;
56725690 case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5673- case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
5691+ case AMDGPU::V_S_RSQ_F16_e64:
5692+ return ST.useRealTrue16Insts () ? AMDGPU::V_RSQ_F16_t16_e64
5693+ : AMDGPU::V_RSQ_F16_fake16_e64;
56745694 case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5675- case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
5695+ case AMDGPU::V_S_SQRT_F16_e64:
5696+ return ST.useRealTrue16Insts () ? AMDGPU::V_SQRT_F16_t16_e64
5697+ : AMDGPU::V_SQRT_F16_fake16_e64;
56765698 }
56775699 llvm_unreachable (
56785700 " Unexpected scalar opcode without corresponding vector one!" );
0 commit comments