Skip to content

Commit c75f588

Browse files
dschmitz89steppi
andauthored
ENH: special: use boost in nctdtr (scipy#21728)
* MAINT: migrate nctdtr to boost * MAINT: simplify unlikely case that neither float or single input is passed to nctdtr * DOC: update nctdtr acc. to boost capabilities * TST: add nctdtr tests * Fix behaviour for -inf [skip ci] Co-authored-by: Albert Steppi <[email protected]> * TST: add more paramter combinations for nctdtr * MAINT: remove tests against unused old Fortran code * TST: bump tolerance to pass on some platforms * TST: refactor nctdtr tests in own class and readded gh19896 tests * MAINT: remove errorstate * TST: fix mpmath reference values and ignore edge case * TST: use pytest parametrization and xfail test case where boost returns negative value * Apply suggestions from code review [skip ci] --------- Co-authored-by: Albert Steppi <[email protected]>
1 parent 26696e2 commit c75f588

File tree

7 files changed

+192
-63
lines changed

7 files changed

+192
-63
lines changed

scipy/special/_add_newdocs.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7076,7 +7076,7 @@ def add_newdoc(name, doc):
70767076
df : array_like
70777077
Degrees of freedom of the distribution. Should be in range (0, inf).
70787078
nc : array_like
7079-
Noncentrality parameter. Should be in range (-1e6, 1e6).
7079+
Noncentrality parameter.
70807080
t : array_like
70817081
Quantiles, i.e., the upper limit of integration.
70827082
out : ndarray, optional
@@ -7094,6 +7094,19 @@ def add_newdoc(name, doc):
70947094
nctdtridf : Calculate degrees of freedom, given CDF and iCDF values.
70957095
nctdtrinc : Calculate non-centrality parameter, given CDF iCDF values.
70967096
7097+
Notes
7098+
-----
7099+
This function calculates the CDF of the non-central t distribution using
7100+
the Boost Math C++ library [1]_.
7101+
7102+
Note that the argument order of `nctdtr` is different from that of the
7103+
similar ``cdf`` method of `scipy.stats.nct`: `t` is the last
7104+
parameter of `nctdtr` but the first parameter of ``scipy.stats.nct.cdf``.
7105+
7106+
References
7107+
----------
7108+
.. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
7109+
70977110
Examples
70987111
--------
70997112
>>> import numpy as np

scipy/special/boost_special_functions.h

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -951,26 +951,46 @@ ncf_kurtosis_excess_double(double v1, double v2, double l)
951951

952952
template<typename Real>
953953
Real
954-
nct_cdf_wrap(const Real x, const Real v, const Real l)
954+
nct_cdf_wrap(const Real v, const Real l, const Real x)
955955
{
956-
if (std::isfinite(x)) {
957-
return boost::math::cdf(
958-
boost::math::non_central_t_distribution<Real, StatsPolicy>(v, l), x);
956+
if (std::isnan(x) || std::isnan(v) || std::isnan(l)) {
957+
return NAN;
959958
}
960-
// -inf => 0, inf => 1
961-
return 1.0 - std::signbit(x);
959+
if (v <= 0) {
960+
sf_error("nctdtr", SF_ERROR_DOMAIN, NULL);
961+
return NAN;
962+
}
963+
if (std::isinf(x)) {
964+
return (x > 0) ? 1.0 : 0.0;
965+
}
966+
Real y;
967+
try {
968+
y = boost::math::cdf(
969+
boost::math::non_central_t_distribution<Real, SpecialPolicy>(v, l), x);
970+
} catch (...) {
971+
/* Boost was unable to produce a result. */
972+
sf_error("nctdtr", SF_ERROR_NO_RESULT, NULL);
973+
y = NAN;
974+
}
975+
if ((y < 0) || (y > 1)) {
976+
/* Result must be between 0 and 1 to be a valid CDF value.
977+
Return NAN if the result is out of bounds because the answer cannot be trusted. */
978+
sf_error("nctdtr", SF_ERROR_NO_RESULT, NULL);
979+
y = NAN;
980+
}
981+
return y;
962982
}
963983

964984
float
965-
nct_cdf_float(float x, float v, float l)
985+
nct_cdf_float(float v, float l, float x)
966986
{
967-
return nct_cdf_wrap(x, v, l);
987+
return nct_cdf_wrap(v, l, x);
968988
}
969989

970990
double
971-
nct_cdf_double(double x, double v, double l)
991+
nct_cdf_double(double v, double l, double x)
972992
{
973-
return nct_cdf_wrap(x, v, l);
993+
return nct_cdf_wrap(v, l, x);
974994
}
975995

976996
template<typename Real>

scipy/special/cython_special.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ cpdef df_number_t ncfdtri(df_number_t x0, df_number_t x1, df_number_t x2, df_num
193193
cpdef double ncfdtridfd(double x0, double x1, double x2, double x3) noexcept nogil
194194
cpdef double ncfdtridfn(double x0, double x1, double x2, double x3) noexcept nogil
195195
cpdef double ncfdtrinc(double x0, double x1, double x2, double x3) noexcept nogil
196-
cpdef double nctdtr(double x0, double x1, double x2) noexcept nogil
196+
cpdef df_number_t nctdtr(df_number_t x0, df_number_t x1, df_number_t x2) noexcept nogil
197197
cpdef double nctdtridf(double x0, double x1, double x2) noexcept nogil
198198
cpdef double nctdtrinc(double x0, double x1, double x2) noexcept nogil
199199
cpdef double nctdtrit(double x0, double x1, double x2) noexcept nogil

scipy/special/cython_special.pyx

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,10 +1713,6 @@ from ._cdflib_wrappers cimport ncfdtrinc as _func_ncfdtrinc
17131713
ctypedef double _proto_ncfdtrinc_t(double, double, double, double) noexcept nogil
17141714
cdef _proto_ncfdtrinc_t *_proto_ncfdtrinc_t_var = &_func_ncfdtrinc
17151715

1716-
from ._cdflib_wrappers cimport nctdtr as _func_nctdtr
1717-
ctypedef double _proto_nctdtr_t(double, double, double) noexcept nogil
1718-
cdef _proto_nctdtr_t *_proto_nctdtr_t_var = &_func_nctdtr
1719-
17201716
from ._cdflib_wrappers cimport nctdtridf as _func_nctdtridf
17211717
ctypedef double _proto_nctdtridf_t(double, double, double) noexcept nogil
17221718
cdef _proto_nctdtridf_t *_proto_nctdtridf_t_var = &_func_nctdtridf
@@ -3172,9 +3168,14 @@ cpdef double ncfdtrinc(double x0, double x1, double x2, double x3) noexcept nogi
31723168
"""See the documentation for scipy.special.ncfdtrinc"""
31733169
return _func_ncfdtrinc(x0, x1, x2, x3)
31743170

3175-
cpdef double nctdtr(double x0, double x1, double x2) noexcept nogil:
3171+
cpdef df_number_t nctdtr(df_number_t x0, df_number_t x1, df_number_t x2) noexcept nogil:
31763172
"""See the documentation for scipy.special.nctdtr"""
3177-
return _func_nctdtr(x0, x1, x2)
3173+
if df_number_t is float:
3174+
return (<float(*)(float, float, float) noexcept nogil>scipy.special._ufuncs_cxx._export_nct_cdf_float)(x0, x1, x2)
3175+
elif df_number_t is double:
3176+
return (<double(*)(double, double, double) noexcept nogil>scipy.special._ufuncs_cxx._export_nct_cdf_double)(x0, x1, x2)
3177+
else:
3178+
return NAN
31783179

31793180
cpdef double nctdtridf(double x0, double x1, double x2) noexcept nogil:
31803181
"""See the documentation for scipy.special.nctdtridf"""

scipy/special/functions.json

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,9 @@
615615
"ncfdtrinc": "dddd->d" }
616616
},
617617
"nctdtr": {
618-
"_cdflib_wrappers.pxd": {
619-
"nctdtr": "ddd->d" }
618+
"boost_special_functions.h++": {
619+
"nct_cdf_float": "fff->f",
620+
"nct_cdf_double": "ddd->d" }
620621
},
621622
"nctdtridf": {
622623
"_cdflib_wrappers.pxd": {
@@ -936,12 +937,6 @@
936937
"ncf_kurtosis_excess_double": "ddd->d"
937938
}
938939
},
939-
"_nct_cdf": {
940-
"boost_special_functions.h++": {
941-
"nct_cdf_float": "fff->f",
942-
"nct_cdf_double": "ddd->d"
943-
}
944-
},
945940
"_nct_pdf": {
946941
"boost_special_functions.h++": {
947942
"nct_pdf_float": "fff->f",

scipy/special/tests/test_cdflib.py

Lines changed: 136 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
- nbdtrik
1111
- nbdtrin
1212
- pdtrik
13-
- nctdtr
1413
- nctdtrit
1514
- nctdtridf
1615
- nctdtrinc
@@ -461,40 +460,6 @@ def test_chndtrix_gh2158():
461460
82.35640899964173, 84.45263768373256]
462461
assert_allclose(res, res_exp)
463462

464-
@pytest.mark.xfail_on_32bit("32bit fails due to algorithm threshold")
465-
def test_nctdtr_gh19896():
466-
# test that gh-19896 is resolved.
467-
# Compared to SciPy 1.11 results from Fortran code.
468-
dfarr = [0.98, 9.8, 98, 980]
469-
pnoncarr = [-3.8, 0.38, 3.8, 38]
470-
tarr = [0.0015, 0.15, 1.5, 15]
471-
resarr = [0.9999276519560749, 0.9999276519560749, 0.9999908831755221,
472-
0.9999990265452424, 0.3524153312279712, 0.39749697267251416,
473-
0.7168629634895805, 0.9656246449259646, 7.234804392512006e-05,
474-
7.234804392512006e-05, 0.03538804607509127, 0.795482701508521,
475-
0.0, 0.0, 0.0,
476-
0.011927908523093889, 0.9999276519560749, 0.9999276519560749,
477-
0.9999997441133123, 1.0, 0.3525155979118013,
478-
0.4076312014048369, 0.8476794017035086, 0.9999999297116268,
479-
7.234804392512006e-05, 7.234804392512006e-05, 0.013477443099785824,
480-
0.9998501512331494, 0.0, 0.0,
481-
0.0, 6.561112613212572e-07, 0.9999276519560749,
482-
0.9999276519560749, 0.9999999313496014, 1.0,
483-
0.3525281784865706, 0.40890253001898014, 0.8664672830017024,
484-
1.0, 7.234804392512006e-05, 7.234804392512006e-05,
485-
0.010990889489704836, 1.0, 0.0,
486-
0.0, 0.0, 0.0,
487-
0.9999276519560749, 0.9999276519560749, 0.9999999418789304,
488-
1.0, 0.35252945487817355, 0.40903153246690993,
489-
0.8684247068528264, 1.0, 7.234804392512006e-05,
490-
7.234804392512006e-05, 0.01075068918582911, 1.0,
491-
0.0, 0.0, 0.0, 0.0]
492-
actarr = []
493-
for df, p, t in itertools.product(dfarr, pnoncarr, tarr):
494-
actarr += [sp.nctdtr(df, p, t)]
495-
# The rtol is kept high on purpose to make it pass on 32bit systems
496-
assert_allclose(actarr, resarr, rtol=1e-6, atol=0.0)
497-
498463

499464
def test_nctdtrinc_gh19896():
500465
# test that gh-19896 is resolved.
@@ -585,3 +550,139 @@ def test_ncfdtr(dfn, dfd, nc, f, expected):
585550
# sample_idx = rng.choice(len(re), replace=False, size=12)
586551
# cases = np.array(cases)[sample_idx].tolist()
587552
assert_allclose(sp.ncfdtr(dfn, dfd, nc, f), expected, rtol=1e-13, atol=0)
553+
554+
555+
class TestNctdtr:
556+
557+
# Reference values computed with mpmath with the following script
558+
# Formula from:
559+
# Lenth, Russell V (1989). "Algorithm AS 243: Cumulative Distribution Function
560+
# of the Non-central t Distribution". Journal of the Royal Statistical Society,
561+
# Series C. 38 (1): 185-189
562+
#
563+
# Warning: may take a long time to run
564+
#
565+
# from mpmath import mp
566+
# mp.dps = 400
567+
568+
# def nct_cdf(df, nc, x):
569+
# df, nc, x = map(mp.mpf, (df, nc, x))
570+
571+
# def f(df, nc, x):
572+
# phi = mp.ncdf(-nc)
573+
# y = x * x / (x * x + df)
574+
# constant = mp.exp(-nc * nc / 2.)
575+
# def term(j):
576+
# intermediate = constant * (nc *nc / 2.)**j
577+
# p = intermediate/mp.factorial(j)
578+
# q = nc / (mp.sqrt(2.) * mp.gamma(j + 1.5)) * intermediate
579+
# first_beta_term = mp.betainc(j + 0.5, df/2., x2=y,
580+
# regularized=True)
581+
# second_beta_term = mp.betainc(j + mp.one, df/2., x2=y,
582+
# regularized=True)
583+
# return p * first_beta_term + q * second_beta_term
584+
585+
# sum_term = mp.nsum(term, [0, mp.inf])
586+
# f = phi + 0.5 * sum_term
587+
# return f
588+
589+
# if x >= 0:
590+
# result = f(df, nc, x)
591+
# else:
592+
# result = mp.one - f(df, -nc, x)
593+
# return float(result)
594+
595+
@pytest.mark.parametrize("df, nc, x, expected", [
596+
(0.98, -3.8, 0.0015, 0.9999279987514815),
597+
(0.98, -3.8, 0.15, 0.9999528361700505),
598+
(0.98, -3.8, 1.5, 0.9999908823016942),
599+
(0.98, -3.8, 15, 0.9999990264591945),
600+
(0.98, 0.38, 0.0015, 0.35241533122693),
601+
(0.98, 0.38, 0.15, 0.39749697267146983),
602+
(0.98, 0.38, 1.5, 0.716862963488558),
603+
(0.98, 0.38, 15, 0.9656246449257494),
604+
(0.98, 3.8, 0.0015, 7.26973354942293e-05),
605+
(0.98, 3.8, 0.15, 0.00012416481147589105),
606+
(0.98, 3.8, 1.5, 0.035388035775454095),
607+
(0.98, 3.8, 15, 0.7954826975430583),
608+
(0.98, 38, 0.0015, 3.02106943e-316),
609+
(0.98, 38, 0.15, 6.069970616996603e-309),
610+
(0.98, 38, 1.5, 2.591995360483094e-97),
611+
(0.98, 38, 15, 0.011927265886910935),
612+
(9.8, -3.8, 0.0015, 0.9999280776192786),
613+
(9.8, -3.8, 0.15, 0.9999599410685442),
614+
(9.8, -3.8, 1.5, 0.9999997432394788),
615+
(9.8, -3.8, 15, 0.9999999999999984),
616+
(9.8, 0.38, 0.0015, 0.3525155979107491),
617+
(9.8, 0.38, 0.15, 0.40763120140379194),
618+
(9.8, 0.38, 1.5, 0.8476794017024651),
619+
(9.8, 0.38, 15, 0.9999999297116268),
620+
(9.8, 3.8, 0.0015, 7.277620328149153e-05),
621+
(9.8, 3.8, 0.15, 0.00013024802220900652),
622+
(9.8, 3.8, 1.5, 0.013477432800072933),
623+
(9.8, 3.8, 15, 0.999850151230648),
624+
(9.8, 38, 0.0015, 3.05066095e-316),
625+
(9.8, 38, 0.15, 1.79065514676e-313),
626+
(9.8, 38, 1.5, 2.0935940165900746e-249),
627+
(9.8, 38, 15, 2.252076291604796e-09),
628+
(98, -3.8, 0.0015, 0.9999280875149109),
629+
(98, -3.8, 0.15, 0.9999608250170452),
630+
(98, -3.8, 1.5, 0.9999999304757682),
631+
(98, -3.8, 15, 1.0),
632+
(98, 0.38, 0.0015, 0.35252817848596313),
633+
(98, 0.38, 0.15, 0.40890253001794846),
634+
(98, 0.38, 1.5, 0.8664672830006552),
635+
(98, 0.38, 15, 1.0),
636+
(98, 3.8, 0.0015, 7.278609891281275e-05),
637+
(98, 3.8, 0.15, 0.0001310318674827004),
638+
(98, 3.8, 1.5, 0.010990879189991727),
639+
(98, 3.8, 15, 0.9999999999999989),
640+
(98, 38, 0.0015, 3.05437385e-316),
641+
(98, 38, 0.15, 9.1668336166e-314),
642+
(98, 38, 1.5, 1.8085884236563926e-288),
643+
(98, 38, 15, 2.7740532792035907e-50),
644+
(980, -3.8, 0.0015, 0.9999280885188965),
645+
(980, -3.8, 0.15, 0.9999609144559273),
646+
(980, -3.8, 1.5, 0.9999999410050979),
647+
(980, -3.8, 15, 1.0),
648+
(980, 0.38, 0.0015, 0.3525294548792812),
649+
(980, 0.38, 0.15, 0.4090315324657382),
650+
(980, 0.38, 1.5, 0.8684247068517293),
651+
(980, 0.38, 15, 1.0),
652+
(980, 3.8, 0.0015, 7.278710289828983e-05),
653+
(980, 3.8, 0.15, 0.00013111131667906573),
654+
(980, 3.8, 1.5, 0.010750678886113882),
655+
(980, 3.8, 15, 1.0),
656+
(980, 38, 0.0015, 3.0547506e-316),
657+
(980, 38, 0.15, 8.6191646313e-314),
658+
pytest.param(980, 38, 1.5, 1.1824454111413493e-291,
659+
marks=pytest.mark.xfail(
660+
reason="Bug in underlying Boost math implementation")),
661+
(980, 38, 15, 5.407535300713606e-105)
662+
])
663+
def test_gh19896(self, df, nc, x, expected):
664+
# test that gh-19896 is resolved.
665+
# Originally this was a regression test that used the old Fortran results
666+
# as a reference. The Fortran results were not accurate, so the reference
667+
# values were recomputed with mpmath.
668+
result = sp.nctdtr(df, nc, x)
669+
assert_allclose(result, expected, rtol=1e-13, atol=1e-303)
670+
671+
def test_nctdtr_gh8344(self):
672+
# test that gh-8344 is resolved.
673+
df, nc, x = 3000, 3, 0.1
674+
expected = 0.0018657780826323328
675+
assert_allclose(sp.nctdtr(df, nc, x), expected, rtol=1e-14)
676+
677+
@pytest.mark.parametrize(
678+
"df, nc, x, expected, rtol",
679+
[[3., 5., -2., 1.5645373999149622e-09, 5e-9],
680+
[1000., 10., 1., 1.1493552133826623e-19, 1e-13],
681+
[1e-5, -6., 2., 0.9999999990135003, 1e-13],
682+
[10., 20., 0.15, 6.426530505957303e-88, 1e-13],
683+
[1., 1., np.inf, 1.0, 0.0],
684+
[1., 1., -np.inf, 0.0, 0.0]
685+
]
686+
)
687+
def test_accuracy(self, df, nc, x, expected, rtol):
688+
assert_allclose(sp.nctdtr(df, nc, x), expected, rtol=rtol)

scipy/stats/_continuous_distns.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8102,8 +8102,7 @@ def _pdf(self, x, df, nc):
81028102
return scu._nct_pdf(x, df, nc)
81038103

81048104
def _cdf(self, x, df, nc):
8105-
with np.errstate(over='ignore'): # see gh-17432
8106-
return np.clip(scu._nct_cdf(x, df, nc), 0, 1)
8105+
return sc.nctdtr(df, nc, x)
81078106

81088107
def _ppf(self, q, df, nc):
81098108
with np.errstate(over='ignore'): # see gh-17432

0 commit comments

Comments
 (0)