@@ -1805,24 +1805,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
18051805 case Intrinsic::nvvm_ceil_f:
18061806 case Intrinsic::nvvm_ceil_ftz_f:
18071807
1808- case Intrinsic::nvvm_cos_approx_f:
1809- case Intrinsic::nvvm_cos_approx_ftz_f:
1810-
1811- case Intrinsic::nvvm_ex2_approx_d:
1812- case Intrinsic::nvvm_ex2_approx_f:
1813- case Intrinsic::nvvm_ex2_approx_ftz_f:
1814-
18151808 case Intrinsic::nvvm_fabs:
18161809 case Intrinsic::nvvm_fabs_ftz:
18171810
18181811 case Intrinsic::nvvm_floor_d:
18191812 case Intrinsic::nvvm_floor_f:
18201813 case Intrinsic::nvvm_floor_ftz_f:
18211814
1822- case Intrinsic::nvvm_lg2_approx_d:
1823- case Intrinsic::nvvm_lg2_approx_f:
1824- case Intrinsic::nvvm_lg2_approx_ftz_f:
1825-
18261815 case Intrinsic::nvvm_rcp_rm_d:
18271816 case Intrinsic::nvvm_rcp_rm_f:
18281817 case Intrinsic::nvvm_rcp_rm_ftz_f:
@@ -1835,31 +1824,19 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
18351824 case Intrinsic::nvvm_rcp_rz_d:
18361825 case Intrinsic::nvvm_rcp_rz_f:
18371826 case Intrinsic::nvvm_rcp_rz_ftz_f:
1838- case Intrinsic::nvvm_rcp_approx_ftz_d:
1839- case Intrinsic::nvvm_rcp_approx_ftz_f:
18401827
18411828 case Intrinsic::nvvm_round_d:
18421829 case Intrinsic::nvvm_round_f:
18431830 case Intrinsic::nvvm_round_ftz_f:
18441831
1845- case Intrinsic::nvvm_rsqrt_approx_d:
1846- case Intrinsic::nvvm_rsqrt_approx_f:
1847- case Intrinsic::nvvm_rsqrt_approx_ftz_d:
1848- case Intrinsic::nvvm_rsqrt_approx_ftz_f:
1849-
18501832 case Intrinsic::nvvm_saturate_d:
18511833 case Intrinsic::nvvm_saturate_f:
18521834 case Intrinsic::nvvm_saturate_ftz_f:
18531835
1854- case Intrinsic::nvvm_sin_approx_f:
1855- case Intrinsic::nvvm_sin_approx_ftz_f:
1856-
18571836 case Intrinsic::nvvm_sqrt_f:
18581837 case Intrinsic::nvvm_sqrt_rn_d:
18591838 case Intrinsic::nvvm_sqrt_rn_f:
18601839 case Intrinsic::nvvm_sqrt_rn_ftz_f:
1861- case Intrinsic::nvvm_sqrt_approx_f:
1862- case Intrinsic::nvvm_sqrt_approx_ftz_f:
18631840 return !Call->isStrictFP ();
18641841
18651842 // Sign operations are actually bitwise operations, they do not raise
@@ -2025,15 +2002,6 @@ inline bool llvm_fenv_testexcept() {
20252002 return false ;
20262003}
20272004
2028- // Get only the upper word of the input double in 1.11.20 format
2029- // by making the lower 32-bits of the mantissa all 0.
2030- static const APFloat ZeroLower32Bits (const APFloat &V) {
2031- assert (V.getSizeInBits (V.getSemantics ()) == 64 );
2032- uint64_t DoubleBits = V.bitcastToAPInt ().getZExtValue ();
2033- DoubleBits &= 0xffffffff00000000 ;
2034- return APFloat (V.getSemantics (), APInt (64 , DoubleBits, false , false ));
2035- }
2036-
20372005static const APFloat FTZPreserveSign (const APFloat &V) {
20382006 if (V.isDenormal ())
20392007 return APFloat::getZero (V.getSemantics (), V.isNegative ());
@@ -2663,21 +2631,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26632631 nvvm::GetNVVMDenromMode (
26642632 nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
26652633
2666- case Intrinsic::nvvm_cos_approx_ftz_f:
2667- case Intrinsic::nvvm_cos_approx_f:
2668- return ConstantFoldFP (
2669- cos, APF, Ty,
2670- nvvm::GetNVVMDenromMode (
2671- nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2672-
2673- case Intrinsic::nvvm_ex2_approx_ftz_f:
2674- case Intrinsic::nvvm_ex2_approx_d:
2675- case Intrinsic::nvvm_ex2_approx_f:
2676- return ConstantFoldFP (
2677- exp2, APF, Ty,
2678- nvvm::GetNVVMDenromMode (
2679- (nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID))));
2680-
26812634 case Intrinsic::nvvm_fabs_ftz:
26822635 case Intrinsic::nvvm_fabs:
26832636 return ConstantFoldFP (
@@ -2693,23 +2646,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26932646 nvvm::GetNVVMDenromMode (
26942647 nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
26952648
2696- case Intrinsic::nvvm_lg2_approx_ftz_f:
2697- case Intrinsic::nvvm_lg2_approx_d:
2698- case Intrinsic::nvvm_lg2_approx_f: {
2699- if (APF.isNegative () || APF.isZero ())
2700- return nullptr ;
2701- return ConstantFoldFP (
2702- log2, APF, Ty,
2703- nvvm::GetNVVMDenromMode (
2704- nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2705- }
2706-
27072649 case Intrinsic::nvvm_rcp_rm_ftz_f:
27082650 case Intrinsic::nvvm_rcp_rn_ftz_f:
27092651 case Intrinsic::nvvm_rcp_rp_ftz_f:
27102652 case Intrinsic::nvvm_rcp_rz_ftz_f:
2711- case Intrinsic::nvvm_rcp_approx_ftz_f:
2712- case Intrinsic::nvvm_rcp_approx_ftz_d:
27132653 case Intrinsic::nvvm_rcp_rm_d:
27142654 case Intrinsic::nvvm_rcp_rm_f:
27152655 case Intrinsic::nvvm_rcp_rn_d:
@@ -2719,26 +2659,15 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
27192659 case Intrinsic::nvvm_rcp_rz_d:
27202660 case Intrinsic::nvvm_rcp_rz_f: {
27212661 APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode (IntrinsicID);
2722- bool IsApprox = nvvm::RCPIsApprox (IntrinsicID);
27232662 bool IsFTZ = nvvm::RCPShouldFTZ (IntrinsicID);
27242663
27252664 auto Denominator = IsFTZ ? FTZPreserveSign (APF) : APF;
2726- if (IntrinsicID == Intrinsic::nvvm_rcp_approx_ftz_d)
2727- Denominator = ZeroLower32Bits (Denominator);
2728- if (IsApprox && Denominator.isZero ()) {
2729- // According to the PTX spec, approximate rcp should return infinity
2730- // with the same sign as the denominator when dividing by 0.
2731- APFloat Inf = APFloat::getInf (APF.getSemantics (), APF.isNegative ());
2732- return ConstantFP::get (Ty->getContext (), Inf);
2733- }
27342665 APFloat Res = APFloat::getOne (APF.getSemantics ());
27352666 APFloat::opStatus Status = Res.divide (Denominator, RoundMode);
27362667
27372668 if (Status == APFloat::opOK || Status == APFloat::opInexact) {
27382669 if (IsFTZ)
27392670 Res = FTZPreserveSign (Res);
2740- if (IntrinsicID == Intrinsic::nvvm_rcp_approx_ftz_d)
2741- Res = ZeroLower32Bits (Res);
27422671 return ConstantFP::get (Ty->getContext (), Res);
27432672 }
27442673 return nullptr ;
@@ -2752,37 +2681,6 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
27522681 nvvm::GetNVVMDenromMode (
27532682 nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
27542683
2755- case Intrinsic::nvvm_rsqrt_approx_ftz_d:
2756- case Intrinsic::nvvm_rsqrt_approx_ftz_f:
2757- case Intrinsic::nvvm_rsqrt_approx_d:
2758- case Intrinsic::nvvm_rsqrt_approx_f: {
2759- bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID);
2760- auto V = IsFTZ ? FTZPreserveSign (APF) : APF;
2761-
2762- if (IntrinsicID == Intrinsic::nvvm_rsqrt_approx_ftz_d)
2763- V = ZeroLower32Bits (V);
2764-
2765- APFloat SqrtV (sqrt (V.convertToDouble ()));
2766-
2767- if (Ty->isFloatTy ()) {
2768- bool lost;
2769- SqrtV.convert (APF.getSemantics (), APFloat::rmNearestTiesToEven,
2770- &lost);
2771- }
2772-
2773- APFloat Res = APFloat::getOne (APF.getSemantics ());
2774- Res.divide (SqrtV, APFloat::rmNearestTiesToEven);
2775-
2776- if (IntrinsicID == Intrinsic::nvvm_rsqrt_approx_ftz_d)
2777- Res = ZeroLower32Bits (Res);
2778-
2779- // We do not need to flush the output for ftz because it is impossible
2780- // for 1/sqrt(x) to be a denormal value. If x is the largest fp value,
2781- // sqrt(x) will be a number with the exponent approximately halved and
2782- // the reciprocal of that number can't be small enough to be denormal.
2783- return ConstantFP::get (Ty->getContext (), Res);
2784- }
2785-
27862684 case Intrinsic::nvvm_saturate_ftz_f:
27872685 case Intrinsic::nvvm_saturate_d:
27882686 case Intrinsic::nvvm_saturate_f: {
@@ -2796,19 +2694,10 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
27962694 return ConstantFP::get (Ty->getContext (), APF);
27972695 }
27982696
2799- case Intrinsic::nvvm_sin_approx_ftz_f:
2800- case Intrinsic::nvvm_sin_approx_f:
2801- return ConstantFoldFP (
2802- sin, APF, Ty,
2803- nvvm::GetNVVMDenromMode (
2804- nvvm::UnaryMathIntrinsicShouldFTZ (IntrinsicID)));
2805-
28062697 case Intrinsic::nvvm_sqrt_rn_ftz_f:
2807- case Intrinsic::nvvm_sqrt_approx_ftz_f:
28082698 case Intrinsic::nvvm_sqrt_f:
28092699 case Intrinsic::nvvm_sqrt_rn_d:
28102700 case Intrinsic::nvvm_sqrt_rn_f:
2811- case Intrinsic::nvvm_sqrt_approx_f:
28122701 if (APF.isNegative ())
28132702 return nullptr ;
28142703 return ConstantFoldFP (
0 commit comments