ENH: special: use boost in nctdtr (scipy#21728)

dschmitz89 · steppi · web-flow · commit c75f58812691 · 2024-11-22T08:50:07.000-05:00
* MAINT: migrate nctdtr to boost

* MAINT: simplify unlikely case that neither float or single input is passed to nctdtr

* DOC: update nctdtr acc. to boost capabilities

* TST: add nctdtr tests

* Fix behaviour for -inf [skip ci]

Co-authored-by: Albert Steppi &lt;albert.steppi@gmail.com&gt;

* TST: add more paramter combinations for nctdtr

* MAINT: remove tests against unused old Fortran code

* TST: bump tolerance to pass on some platforms

* TST: refactor nctdtr tests in own class and readded gh19896 tests

* MAINT: remove errorstate

* TST: fix mpmath reference values and ignore edge case

* TST: use pytest parametrization and xfail test case where boost returns negative value

* Apply suggestions from code review

[skip ci]

---------

Co-authored-by: Albert Steppi &lt;albert.steppi@gmail.com&gt;
diff --git a/scipy/special/_add_newdocs.py b/scipy/special/_add_newdocs.py
@@ -7076,7 +7076,7 @@ def add_newdoc(name, doc):
     df : array_like
         Degrees of freedom of the distribution. Should be in range (0, inf).
     nc : array_like
-        Noncentrality parameter. Should be in range (-1e6, 1e6).
+        Noncentrality parameter.
     t : array_like
         Quantiles, i.e., the upper limit of integration.
     out : ndarray, optional
@@ -7094,6 +7094,19 @@ def add_newdoc(name, doc):
     nctdtridf : Calculate degrees of freedom, given CDF and iCDF values.
     nctdtrinc : Calculate non-centrality parameter, given CDF iCDF values.
 
+    Notes
+    -----
+    This function calculates the CDF of the non-central t distribution using
+    the Boost Math C++ library [1]_.
+
+    Note that the argument order of `nctdtr` is different from that of the
+    similar ``cdf`` method of `scipy.stats.nct`: `t` is the last
+    parameter of `nctdtr` but the first parameter of ``scipy.stats.nct.cdf``.
+
+    References
+    ----------
+    .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
+
     Examples
     --------
     >>> import numpy as np
diff --git a/scipy/special/boost_special_functions.h b/scipy/special/boost_special_functions.h
@@ -951,26 +951,46 @@ ncf_kurtosis_excess_double(double v1, double v2, double l)
 
 template<typename Real>
 Real
-nct_cdf_wrap(const Real x, const Real v, const Real l)
+nct_cdf_wrap(const Real v, const Real l, const Real x)
 {
-    if (std::isfinite(x)) {
-        return boost::math::cdf(
-            boost::math::non_central_t_distribution<Real, StatsPolicy>(v, l), x);
+    if (std::isnan(x) || std::isnan(v) || std::isnan(l)) {
+	return NAN;
     }
-    // -inf => 0, inf => 1
-    return 1.0 - std::signbit(x);
+    if (v <= 0) {
+	sf_error("nctdtr", SF_ERROR_DOMAIN, NULL);
+	return NAN;
+    }
+    if (std::isinf(x)) {
+	return  (x > 0) ? 1.0 : 0.0;
+    }
+    Real y;
+    try {
+	y = boost::math::cdf(
+                boost::math::non_central_t_distribution<Real, SpecialPolicy>(v, l), x);
+    } catch (...) {
+	/* Boost was unable to produce a result. */
+        sf_error("nctdtr", SF_ERROR_NO_RESULT, NULL);
+        y = NAN;
+    }
+    if ((y < 0) || (y > 1)) {
+	/* Result must be between 0 and 1 to be a valid CDF value.
+       Return NAN if the result is out of bounds because the answer cannot be trusted. */
+	    sf_error("nctdtr", SF_ERROR_NO_RESULT, NULL);
+        y = NAN;
+    }
+    return y;
 }
 
 float
-nct_cdf_float(float x, float v, float l)
+nct_cdf_float(float v, float l, float x)
 {
-    return nct_cdf_wrap(x, v, l);
+    return nct_cdf_wrap(v, l, x);
 }
 
 double
-nct_cdf_double(double x, double v, double l)
+nct_cdf_double(double v, double l, double x)
 {
-    return nct_cdf_wrap(x, v, l);
+    return nct_cdf_wrap(v, l, x);
 }
 
 template<typename Real>
diff --git a/scipy/special/cython_special.pxd b/scipy/special/cython_special.pxd
@@ -193,7 +193,7 @@ cpdef df_number_t ncfdtri(df_number_t x0, df_number_t x1, df_number_t x2, df_num
 cpdef double ncfdtridfd(double x0, double x1, double x2, double x3) noexcept nogil
 cpdef double ncfdtridfn(double x0, double x1, double x2, double x3) noexcept nogil
 cpdef double ncfdtrinc(double x0, double x1, double x2, double x3) noexcept nogil
-cpdef double nctdtr(double x0, double x1, double x2) noexcept nogil
+cpdef df_number_t nctdtr(df_number_t x0, df_number_t x1, df_number_t x2) noexcept nogil
 cpdef double nctdtridf(double x0, double x1, double x2) noexcept nogil
 cpdef double nctdtrinc(double x0, double x1, double x2) noexcept nogil
 cpdef double nctdtrit(double x0, double x1, double x2) noexcept nogil
diff --git a/scipy/special/cython_special.pyx b/scipy/special/cython_special.pyx
@@ -1713,10 +1713,6 @@ from ._cdflib_wrappers cimport ncfdtrinc as _func_ncfdtrinc
 ctypedef double _proto_ncfdtrinc_t(double, double, double, double) noexcept nogil
 cdef _proto_ncfdtrinc_t *_proto_ncfdtrinc_t_var = &_func_ncfdtrinc
 
-from ._cdflib_wrappers cimport nctdtr as _func_nctdtr
-ctypedef double _proto_nctdtr_t(double, double, double) noexcept nogil
-cdef _proto_nctdtr_t *_proto_nctdtr_t_var = &_func_nctdtr
-
 from ._cdflib_wrappers cimport nctdtridf as _func_nctdtridf
 ctypedef double _proto_nctdtridf_t(double, double, double) noexcept nogil
 cdef _proto_nctdtridf_t *_proto_nctdtridf_t_var = &_func_nctdtridf
@@ -3172,9 +3168,14 @@ cpdef double ncfdtrinc(double x0, double x1, double x2, double x3) noexcept nogi
     """See the documentation for scipy.special.ncfdtrinc"""
     return _func_ncfdtrinc(x0, x1, x2, x3)
 
-cpdef double nctdtr(double x0, double x1, double x2) noexcept nogil:
+cpdef df_number_t nctdtr(df_number_t x0, df_number_t x1, df_number_t x2) noexcept nogil:
     """See the documentation for scipy.special.nctdtr"""
-    return _func_nctdtr(x0, x1, x2)
+    if df_number_t is float:
+        return (<float(*)(float, float, float) noexcept nogil>scipy.special._ufuncs_cxx._export_nct_cdf_float)(x0, x1, x2)
+    elif df_number_t is double:
+        return (<double(*)(double, double, double) noexcept nogil>scipy.special._ufuncs_cxx._export_nct_cdf_double)(x0, x1, x2)
+    else:
+        return NAN
 
 cpdef double nctdtridf(double x0, double x1, double x2) noexcept nogil:
     """See the documentation for scipy.special.nctdtridf"""
diff --git a/scipy/special/functions.json b/scipy/special/functions.json
@@ -615,8 +615,9 @@
             "ncfdtrinc": "dddd->d"        }
     },
     "nctdtr": {
-        "_cdflib_wrappers.pxd": {
-            "nctdtr": "ddd->d"        }
+        "boost_special_functions.h++": {
+            "nct_cdf_float": "fff->f",
+            "nct_cdf_double": "ddd->d"    }
     },
     "nctdtridf": {
         "_cdflib_wrappers.pxd": {
@@ -936,12 +937,6 @@
             "ncf_kurtosis_excess_double": "ddd->d"
         }
     },
-    "_nct_cdf": {
-        "boost_special_functions.h++": {
-            "nct_cdf_float": "fff->f",
-            "nct_cdf_double": "ddd->d"
-        }
-    },
     "_nct_pdf": {
         "boost_special_functions.h++": {
             "nct_pdf_float": "fff->f",
diff --git a/scipy/special/tests/test_cdflib.py b/scipy/special/tests/test_cdflib.py
@@ -10,7 +10,6 @@
 - nbdtrik
 - nbdtrin
 - pdtrik
-- nctdtr
 - nctdtrit
 - nctdtridf
 - nctdtrinc
@@ -461,40 +460,6 @@ def test_chndtrix_gh2158():
                82.35640899964173, 84.45263768373256]
     assert_allclose(res, res_exp)
 
-@pytest.mark.xfail_on_32bit("32bit fails due to algorithm threshold")
-def test_nctdtr_gh19896():
-    # test that gh-19896 is resolved.
-    # Compared to SciPy 1.11 results from Fortran code.
-    dfarr = [0.98, 9.8, 98, 980]
-    pnoncarr = [-3.8, 0.38, 3.8, 38]
-    tarr = [0.0015, 0.15, 1.5, 15]
-    resarr = [0.9999276519560749, 0.9999276519560749, 0.9999908831755221,
-              0.9999990265452424, 0.3524153312279712, 0.39749697267251416,
-              0.7168629634895805, 0.9656246449259646, 7.234804392512006e-05,
-              7.234804392512006e-05, 0.03538804607509127, 0.795482701508521,
-              0.0, 0.0, 0.0,
-              0.011927908523093889, 0.9999276519560749, 0.9999276519560749,
-              0.9999997441133123, 1.0, 0.3525155979118013,
-              0.4076312014048369, 0.8476794017035086, 0.9999999297116268,
-              7.234804392512006e-05, 7.234804392512006e-05, 0.013477443099785824,
-              0.9998501512331494, 0.0, 0.0,
-              0.0, 6.561112613212572e-07, 0.9999276519560749,
-              0.9999276519560749, 0.9999999313496014, 1.0,
-              0.3525281784865706, 0.40890253001898014, 0.8664672830017024,
-              1.0, 7.234804392512006e-05, 7.234804392512006e-05,
-              0.010990889489704836, 1.0, 0.0,
-              0.0, 0.0, 0.0,
-              0.9999276519560749, 0.9999276519560749, 0.9999999418789304,
-              1.0, 0.35252945487817355, 0.40903153246690993,
-              0.8684247068528264, 1.0, 7.234804392512006e-05,
-              7.234804392512006e-05, 0.01075068918582911, 1.0,
-              0.0, 0.0, 0.0, 0.0]
-    actarr = []
-    for df, p, t in itertools.product(dfarr, pnoncarr, tarr):
-        actarr += [sp.nctdtr(df, p, t)]
-    # The rtol is kept high on purpose to make it pass on 32bit systems
-    assert_allclose(actarr, resarr, rtol=1e-6, atol=0.0)
-
 
 def test_nctdtrinc_gh19896():
     # test that gh-19896 is resolved.
@@ -585,3 +550,139 @@ def test_ncfdtr(dfn, dfd, nc, f, expected):
     # sample_idx = rng.choice(len(re), replace=False, size=12)
     # cases = np.array(cases)[sample_idx].tolist()
     assert_allclose(sp.ncfdtr(dfn, dfd, nc, f), expected, rtol=1e-13, atol=0)
+
+
+class TestNctdtr:
+
+    # Reference values computed with mpmath with the following script
+    # Formula from:
+    # Lenth, Russell V (1989). "Algorithm AS 243: Cumulative Distribution Function
+    # of the Non-central t Distribution". Journal of the Royal Statistical Society,
+    # Series C. 38 (1): 185-189
+    #
+    # Warning: may take a long time to run
+    #
+    # from mpmath import mp
+    # mp.dps = 400
+
+    # def nct_cdf(df, nc, x):
+    #     df, nc, x = map(mp.mpf, (df, nc, x))
+        
+    #     def f(df, nc, x):
+    #         phi = mp.ncdf(-nc)
+    #         y = x * x / (x * x + df)
+    #         constant = mp.exp(-nc * nc / 2.)
+    #         def term(j):
+    #             intermediate = constant * (nc *nc / 2.)**j
+    #             p = intermediate/mp.factorial(j)
+    #             q = nc / (mp.sqrt(2.) * mp.gamma(j + 1.5)) * intermediate
+    #             first_beta_term = mp.betainc(j + 0.5, df/2., x2=y,
+    #                                          regularized=True)
+    #             second_beta_term = mp.betainc(j + mp.one, df/2., x2=y,
+    #                                           regularized=True)
+    #             return p * first_beta_term + q * second_beta_term
+
+    #         sum_term = mp.nsum(term, [0, mp.inf])
+    #         f = phi + 0.5 * sum_term
+    #         return f
+
+    #     if x >= 0:
+    #         result = f(df, nc, x)
+    #     else:
+    #         result = mp.one - f(df, -nc, x)
+    #     return float(result)
+
+    @pytest.mark.parametrize("df, nc, x, expected", [
+        (0.98, -3.8, 0.0015, 0.9999279987514815),
+        (0.98, -3.8, 0.15, 0.9999528361700505),
+        (0.98, -3.8, 1.5, 0.9999908823016942),
+        (0.98, -3.8, 15, 0.9999990264591945),
+        (0.98, 0.38, 0.0015, 0.35241533122693),
+        (0.98, 0.38, 0.15, 0.39749697267146983),
+        (0.98, 0.38, 1.5, 0.716862963488558),
+        (0.98, 0.38, 15, 0.9656246449257494),
+        (0.98, 3.8, 0.0015, 7.26973354942293e-05),
+        (0.98, 3.8, 0.15, 0.00012416481147589105),
+        (0.98, 3.8, 1.5, 0.035388035775454095),
+        (0.98, 3.8, 15, 0.7954826975430583),
+        (0.98, 38, 0.0015, 3.02106943e-316),
+        (0.98, 38, 0.15, 6.069970616996603e-309),
+        (0.98, 38, 1.5, 2.591995360483094e-97),
+        (0.98, 38, 15, 0.011927265886910935),
+        (9.8, -3.8, 0.0015, 0.9999280776192786),
+        (9.8, -3.8, 0.15, 0.9999599410685442),
+        (9.8, -3.8, 1.5, 0.9999997432394788),
+        (9.8, -3.8, 15, 0.9999999999999984),
+        (9.8, 0.38, 0.0015, 0.3525155979107491),
+        (9.8, 0.38, 0.15, 0.40763120140379194),
+        (9.8, 0.38, 1.5, 0.8476794017024651),
+        (9.8, 0.38, 15, 0.9999999297116268),
+        (9.8, 3.8, 0.0015, 7.277620328149153e-05),
+        (9.8, 3.8, 0.15, 0.00013024802220900652),
+        (9.8, 3.8, 1.5, 0.013477432800072933),
+        (9.8, 3.8, 15, 0.999850151230648),
+        (9.8, 38, 0.0015, 3.05066095e-316),
+        (9.8, 38, 0.15, 1.79065514676e-313),
+        (9.8, 38, 1.5, 2.0935940165900746e-249),
+        (9.8, 38, 15, 2.252076291604796e-09),
+        (98, -3.8, 0.0015, 0.9999280875149109),
+        (98, -3.8, 0.15, 0.9999608250170452),
+        (98, -3.8, 1.5, 0.9999999304757682),
+        (98, -3.8, 15, 1.0),
+        (98, 0.38, 0.0015, 0.35252817848596313),
+        (98, 0.38, 0.15, 0.40890253001794846),
+        (98, 0.38, 1.5, 0.8664672830006552),
+        (98, 0.38, 15, 1.0),
+        (98, 3.8, 0.0015, 7.278609891281275e-05),
+        (98, 3.8, 0.15, 0.0001310318674827004),
+        (98, 3.8, 1.5, 0.010990879189991727),
+        (98, 3.8, 15, 0.9999999999999989),
+        (98, 38, 0.0015, 3.05437385e-316),
+        (98, 38, 0.15, 9.1668336166e-314),
+        (98, 38, 1.5, 1.8085884236563926e-288),
+        (98, 38, 15, 2.7740532792035907e-50),
+        (980, -3.8, 0.0015, 0.9999280885188965),
+        (980, -3.8, 0.15, 0.9999609144559273),
+        (980, -3.8, 1.5, 0.9999999410050979),
+        (980, -3.8, 15, 1.0),
+        (980, 0.38, 0.0015, 0.3525294548792812),
+        (980, 0.38, 0.15, 0.4090315324657382),
+        (980, 0.38, 1.5, 0.8684247068517293),
+        (980, 0.38, 15, 1.0),
+        (980, 3.8, 0.0015, 7.278710289828983e-05),
+        (980, 3.8, 0.15, 0.00013111131667906573),
+        (980, 3.8, 1.5, 0.010750678886113882),
+        (980, 3.8, 15, 1.0),
+        (980, 38, 0.0015, 3.0547506e-316),
+        (980, 38, 0.15, 8.6191646313e-314),
+        pytest.param(980, 38, 1.5, 1.1824454111413493e-291,
+                     marks=pytest.mark.xfail(
+                        reason="Bug in underlying Boost math implementation")),
+        (980, 38, 15, 5.407535300713606e-105)
+    ])
+    def test_gh19896(self, df, nc, x, expected):
+        # test that gh-19896 is resolved.
+        # Originally this was a regression test that used the old Fortran results
+        # as a reference. The Fortran results were not accurate, so the reference
+        # values were recomputed with mpmath.
+        result = sp.nctdtr(df, nc, x)
+        assert_allclose(result, expected, rtol=1e-13, atol=1e-303)
+
+    def test_nctdtr_gh8344(self):
+        # test that gh-8344 is resolved.
+        df, nc, x = 3000, 3, 0.1
+        expected = 0.0018657780826323328
+        assert_allclose(sp.nctdtr(df, nc, x), expected, rtol=1e-14)
+
+    @pytest.mark.parametrize(
+        "df, nc, x, expected, rtol",
+        [[3., 5., -2., 1.5645373999149622e-09, 5e-9],
+         [1000., 10., 1., 1.1493552133826623e-19, 1e-13],
+         [1e-5, -6., 2., 0.9999999990135003, 1e-13],
+         [10., 20., 0.15, 6.426530505957303e-88, 1e-13],
+         [1., 1., np.inf, 1.0, 0.0],
+         [1., 1., -np.inf, 0.0, 0.0]
+        ]
+    )
+    def test_accuracy(self, df, nc, x, expected, rtol):
+        assert_allclose(sp.nctdtr(df, nc, x), expected, rtol=rtol)
diff --git a/scipy/stats/_continuous_distns.py b/scipy/stats/_continuous_distns.py
@@ -8102,8 +8102,7 @@ def _pdf(self, x, df, nc):
         return scu._nct_pdf(x, df, nc)
 
     def _cdf(self, x, df, nc):
-        with np.errstate(over='ignore'):  # see gh-17432
-            return np.clip(scu._nct_cdf(x, df, nc), 0, 1)
+        return sc.nctdtr(df, nc, x)
 
     def _ppf(self, q, df, nc):
         with np.errstate(over='ignore'):  # see gh-17432