@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
18801880 }
18811881 case TargetOpcode::G_FPTOUI:
18821882 case TargetOpcode::G_FPTOSI:
1883+ case TargetOpcode::G_FPTOUI_SAT:
1884+ case TargetOpcode::G_FPTOSI_SAT:
18831885 return narrowScalarFPTOI (MI, TypeIdx, NarrowTy);
18841886 case TargetOpcode::G_FPEXT:
18851887 if (TypeIdx != 0 )
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28722874 else
28732875 widenScalarSrc (MI, WideTy, 1 , TargetOpcode::G_ZEXT);
28742876
2877+ Observer.changedInstr (MI);
2878+ return Legalized;
2879+ case TargetOpcode::G_FPTOSI_SAT:
2880+ case TargetOpcode::G_FPTOUI_SAT:
2881+ Observer.changingInstr (MI);
2882+
2883+ if (TypeIdx == 0 ) {
2884+ Register OldDst = MI.getOperand (0 ).getReg ();
2885+ LLT Ty = MRI.getType (OldDst);
2886+ Register ExtReg = MRI.createGenericVirtualRegister (WideTy);
2887+ Register NewDst;
2888+ MI.getOperand (0 ).setReg (ExtReg);
2889+ uint64_t ShortBits = Ty.getScalarSizeInBits ();
2890+ uint64_t WideBits = WideTy.getScalarSizeInBits ();
2891+ MIRBuilder.setInsertPt (MIRBuilder.getMBB (), ++MIRBuilder.getInsertPt ());
2892+ if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2893+ // z = i16 fptosi_sat(a)
2894+ // ->
2895+ // x = i32 fptosi_sat(a)
2896+ // y = smin(x, 32767)
2897+ // z = smax(y, -32768)
2898+ auto MaxVal = MIRBuilder.buildConstant (
2899+ WideTy, APInt::getSignedMaxValue (ShortBits).sext (WideBits));
2900+ auto MinVal = MIRBuilder.buildConstant (
2901+ WideTy, APInt::getSignedMinValue (ShortBits).sext (WideBits));
2902+ Register MidReg =
2903+ MIRBuilder.buildSMin (WideTy, ExtReg, MaxVal).getReg (0 );
2904+ NewDst = MIRBuilder.buildSMax (WideTy, MidReg, MinVal).getReg (0 );
2905+ } else {
2906+ // z = i16 fptoui_sat(a)
2907+ // ->
2908+ // x = i32 fptoui_sat(a)
2909+ // y = smin(x, 65535)
2910+ auto MaxVal = MIRBuilder.buildConstant (
2911+ WideTy, APInt::getAllOnes (ShortBits).zext (WideBits));
2912+ NewDst = MIRBuilder.buildUMin (WideTy, ExtReg, MaxVal).getReg (0 );
2913+ }
2914+ MIRBuilder.buildTrunc (OldDst, NewDst);
2915+ } else
2916+ widenScalarSrc (MI, WideTy, 1 , TargetOpcode::G_FPEXT);
2917+
28752918 Observer.changedInstr (MI);
28762919 return Legalized;
28772920 case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
41704213 return lowerFPTOUI (MI);
41714214 case G_FPTOSI:
41724215 return lowerFPTOSI (MI);
4216+ case G_FPTOUI_SAT:
4217+ case G_FPTOSI_SAT:
4218+ return lowerFPTOINT_SAT (MI);
41734219 case G_FPTRUNC:
41744220 return lowerFPTRUNC (MI);
41754221 case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
49865032 case G_UITOFP:
49875033 case G_FPTOSI:
49885034 case G_FPTOUI:
5035+ case G_FPTOSI_SAT:
5036+ case G_FPTOUI_SAT:
49895037 case G_INTTOPTR:
49905038 case G_PTRTOINT:
49915039 case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
57775825 case TargetOpcode::G_FPEXT:
57785826 case TargetOpcode::G_FPTOSI:
57795827 case TargetOpcode::G_FPTOUI:
5828+ case TargetOpcode::G_FPTOSI_SAT:
5829+ case TargetOpcode::G_FPTOUI_SAT:
57805830 case TargetOpcode::G_SITOFP:
57815831 case TargetOpcode::G_UITOFP: {
57825832 Observer.changingInstr (MI);
@@ -7285,6 +7335,106 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
72857335 return Legalized;
72867336}
72877337
7338+ LegalizerHelper::LegalizeResult
7339+ LegalizerHelper::lowerFPTOINT_SAT (MachineInstr &MI) {
7340+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs ();
7341+
7342+ bool IsSigned = MI.getOpcode () == TargetOpcode::G_FPTOSI_SAT;
7343+ unsigned SatWidth = DstTy.getScalarSizeInBits ();
7344+
7345+ // Determine minimum and maximum integer values and their corresponding
7346+ // floating-point values.
7347+ APInt MinInt, MaxInt;
7348+ if (IsSigned) {
7349+ MinInt = APInt::getSignedMinValue (SatWidth);
7350+ MaxInt = APInt::getSignedMaxValue (SatWidth);
7351+ } else {
7352+ MinInt = APInt::getMinValue (SatWidth);
7353+ MaxInt = APInt::getMaxValue (SatWidth);
7354+ }
7355+
7356+ const fltSemantics &Semantics = getFltSemanticForLLT (SrcTy.getScalarType ());
7357+ APFloat MinFloat (Semantics);
7358+ APFloat MaxFloat (Semantics);
7359+
7360+ APFloat::opStatus MinStatus =
7361+ MinFloat.convertFromAPInt (MinInt, IsSigned, APFloat::rmTowardZero);
7362+ APFloat::opStatus MaxStatus =
7363+ MaxFloat.convertFromAPInt (MaxInt, IsSigned, APFloat::rmTowardZero);
7364+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7365+ !(MaxStatus & APFloat::opStatus::opInexact);
7366+
7367+ // If the integer bounds are exactly representable as floats, emit a
7368+ // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7369+ // and selects.
7370+ if (AreExactFloatBounds) {
7371+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7372+ auto MaxC = MIRBuilder.buildFConstant (SrcTy, MinFloat);
7373+ auto MaxP = MIRBuilder.buildFCmp (CmpInst::FCMP_ULT,
7374+ SrcTy.changeElementSize (1 ), Src, MaxC);
7375+ auto Max = MIRBuilder.buildSelect (SrcTy, MaxP, Src, MaxC);
7376+ // Clamp by MaxFloat from above. NaN cannot occur.
7377+ auto MinC = MIRBuilder.buildFConstant (SrcTy, MaxFloat);
7378+ auto MinP =
7379+ MIRBuilder.buildFCmp (CmpInst::FCMP_OGT, SrcTy.changeElementSize (1 ), Max,
7380+ MinC, MachineInstr::FmNoNans);
7381+ auto Min =
7382+ MIRBuilder.buildSelect (SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7383+ // Convert clamped value to integer. In the unsigned case we're done,
7384+ // because we mapped NaN to MinFloat, which will cast to zero.
7385+ if (!IsSigned) {
7386+ MIRBuilder.buildFPTOUI (Dst, Min);
7387+ MI.eraseFromParent ();
7388+ return Legalized;
7389+ }
7390+
7391+ // Otherwise, select 0 if Src is NaN.
7392+ auto FpToInt = MIRBuilder.buildFPTOSI (DstTy, Min);
7393+ auto IsZero = MIRBuilder.buildFCmp (CmpInst::FCMP_UNO,
7394+ DstTy.changeElementSize (1 ), Src, Src);
7395+ MIRBuilder.buildSelect (Dst, IsZero, MIRBuilder.buildConstant (DstTy, 0 ),
7396+ FpToInt);
7397+ MI.eraseFromParent ();
7398+ return Legalized;
7399+ }
7400+
7401+ // Result of direct conversion. The assumption here is that the operation is
7402+ // non-trapping and it's fine to apply it to an out-of-range value if we
7403+ // select it away later.
7404+ auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI (DstTy, Src)
7405+ : MIRBuilder.buildFPTOUI (DstTy, Src);
7406+
7407+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
7408+ // MinInt if Src is NaN.
7409+ auto ULT =
7410+ MIRBuilder.buildFCmp (CmpInst::FCMP_ULT, SrcTy.changeElementSize (1 ), Src,
7411+ MIRBuilder.buildFConstant (SrcTy, MinFloat));
7412+ auto Max = MIRBuilder.buildSelect (
7413+ DstTy, ULT, MIRBuilder.buildConstant (DstTy, MinInt), FpToInt);
7414+ // If Src OGT MaxFloat, select MaxInt.
7415+ auto OGT =
7416+ MIRBuilder.buildFCmp (CmpInst::FCMP_OGT, SrcTy.changeElementSize (1 ), Src,
7417+ MIRBuilder.buildFConstant (SrcTy, MaxFloat));
7418+
7419+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
7420+ // is already zero.
7421+ if (!IsSigned) {
7422+ MIRBuilder.buildSelect (Dst, OGT, MIRBuilder.buildConstant (DstTy, MaxInt),
7423+ Max);
7424+ MI.eraseFromParent ();
7425+ return Legalized;
7426+ }
7427+
7428+ // Otherwise, select 0 if Src is NaN.
7429+ auto Min = MIRBuilder.buildSelect (
7430+ DstTy, OGT, MIRBuilder.buildConstant (DstTy, MaxInt), Max);
7431+ auto IsZero = MIRBuilder.buildFCmp (CmpInst::FCMP_UNO,
7432+ DstTy.changeElementSize (1 ), Src, Src);
7433+ MIRBuilder.buildSelect (Dst, IsZero, MIRBuilder.buildConstant (DstTy, 0 ), Min);
7434+ MI.eraseFromParent ();
7435+ return Legalized;
7436+ }
7437+
72887438// f64 -> f16 conversion using round-to-nearest-even rounding mode.
72897439LegalizerHelper::LegalizeResult
72907440LegalizerHelper::lowerFPTRUNC_F64_TO_F16 (MachineInstr &MI) {
0 commit comments