@@ -1054,9 +1054,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10541054
10551055 auto &FPTruncActions = getActionDefinitionsBuilder (G_FPTRUNC);
10561056 if (ST.hasCvtPkF16F32Inst ()) {
1057- FPTruncActions.legalFor ({{S32, S64}, {S16, S32}, {V2S16, V2S32}});
1058- if (TM.Options .UnsafeFPMath )
1059- FPTruncActions.legalFor ({V2S16, V2S64});
1057+ FPTruncActions.legalFor ({{S32, S64}, {S16, S32}, {V2S16, V2S32}})
1058+ .customFor ({V2S16, V2S64});
10601059 } else
10611060 FPTruncActions.legalFor ({{S32, S64}, {S16, S32}});
10621061 FPTruncActions.scalarize (0 ).lower ();
@@ -2156,6 +2155,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21562155 case TargetOpcode::G_FMINNUM_IEEE:
21572156 case TargetOpcode::G_FMAXNUM_IEEE:
21582157 return legalizeMinNumMaxNum (Helper, MI);
2158+ case TargetOpcode::G_FPTRUNC:
2159+ return legalizeFPTrunc (Helper, MI, MRI);
21592160 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
21602161 return legalizeExtractVectorElt (MI, MRI, B);
21612162 case TargetOpcode::G_INSERT_VECTOR_ELT:
@@ -2742,6 +2743,29 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
27422743 return Helper.lowerFMinNumMaxNum (MI) == LegalizerHelper::Legalized;
27432744}
27442745
2746+ bool AMDGPULegalizerInfo::legalizeFPTrunc (LegalizerHelper &Helper,
2747+ MachineInstr &MI,
2748+ MachineRegisterInfo &MRI) const {
2749+ // TODO: We should only use fast math flag. But the global option is
2750+ // still used here to be consistent, especially when the fast math flag is
2751+ // not working for FP_ROUND on the SelectDAG path at this moment.
2752+ MachineFunction &MF = Helper.MIRBuilder .getMF ();
2753+ bool AllowInaccurateFPTRUNC = MI.getFlag (MachineInstr::FmAfn) ||
2754+ MF.getTarget ().Options .UnsafeFPMath ;
2755+
2756+ if (AllowInaccurateFPTRUNC) {
2757+ // Use the tablegen pattern to select native instructions.
2758+ return true ;
2759+ }
2760+
2761+ Register DstReg = MI.getOperand (0 ).getReg ();
2762+ LLT DstTy = MRI.getType (DstReg);
2763+
2764+ // Scalarize the vector and fall through to lower f64 -> f16.
2765+ return Helper.fewerElementsVector (MI, 0 , DstTy.getElementType ()) ==
2766+ LegalizerHelper::Legalized;
2767+ }
2768+
27452769bool AMDGPULegalizerInfo::legalizeExtractVectorElt (
27462770 MachineInstr &MI, MachineRegisterInfo &MRI,
27472771 MachineIRBuilder &B) const {
0 commit comments