Skip to content

Commit a803070

Browse files
authored
MAINT: stats.ContinuousDistribution: improve doc generation; fix broken doctests by making stats.Uniform public again (scipy#22027)
* MAINT: stats.ContinuousDistribution: revert to dynamic doc generation * ENH: stats.Uniform: add uniform distribution * BUILD: stats: remove reference to eliminated _new_distribution_docs.json * MAINT: stats.Uniform: improvements * MAINT: integrate.tanhsinh: fix regression * DOC: stats._distribution_infrastructure: fix doctests [skip ci]
1 parent 0ec0fb3 commit a803070

File tree

8 files changed

+113
-81
lines changed

8 files changed

+113
-81
lines changed

scipy/integrate/_tanhsinh.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,8 @@ def tanhsinh(f, a, b, *, args=(), log=False, maxlevel=None, minlevel=2,
382382
n=minlevel, nit=nit, nfev=nfev, status=status, # iter/eval counts
383383
xr0=xr0, fr0=fr0, wr0=wr0, xl0=xl0, fl0=fl0, wl0=wl0, d4=d4, # err est
384384
ainf=ainf, binf=binf, abinf=abinf, a0=xp.reshape(a0, (-1, 1)), # transforms
385-
# Store the weights in an object so they can't get compressed
386-
# Using RichResult to allow dot notation, but a dict would work
385+
# Store the xjc/wj pair cache in an object so they can't get compressed
386+
# Using RichResult to allow dot notation, but a dictionary would suffice
387387
pair_cache=_RichResult(xjc=None, wj=None, indices=[0], h0=None)) # pair cache
388388

389389
# Constant scalars don't need to be put in `work` unless they need to be

scipy/stats/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@
470470
471471
make_distribution
472472
Normal
473+
Uniform
473474
Mixture
474475
order_statistic
475476
truncate
@@ -648,7 +649,7 @@
648649
from ._distribution_infrastructure import (
649650
make_distribution, Mixture, order_statistic, truncate, exp, log, abs
650651
)
651-
from ._new_distributions import Normal
652+
from ._new_distributions import Normal, Uniform
652653
from ._mgc import multiscale_graphcorr
653654
from ._correlation import chatterjeexi
654655

scipy/stats/_distribution_infrastructure.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,11 +1174,11 @@ def _log1mexp(x):
11741174
Examples
11751175
--------
11761176
>>> import numpy as np
1177-
>>> from scipy.special import log1m
1177+
>>> from scipy.stats._distribution_infrastructure import _log1mexp
11781178
>>> x = 1e-300 # log of a number very close to 1
11791179
>>> _log1mexp(x) # log of the complement of a number very close to 1
11801180
-690.7755278982137
1181-
>>> # p.log(1 - np.exp(x)) # -inf; emits warning
1181+
>>> # np.log1p(-np.exp(x)) # -inf; emits warning
11821182
11831183
"""
11841184
def f1(x):
@@ -3195,7 +3195,16 @@ def _logmoment(self, order=1, *, logcenter=None, standardized=False):
31953195
def _logmoment_quad(self, order, logcenter, **params):
31963196
def logintegrand(x, order, logcenter, **params):
31973197
logpdf = self._logpdf_dispatch(x, **params)
3198-
return logpdf + order*_logexpxmexpy(np.log(x+0j), logcenter)
3198+
return logpdf + order * _logexpxmexpy(np.log(x + 0j), logcenter)
3199+
## if logx == logcenter, `_logexpxmexpy` returns (-inf + 0j)
3200+
## multiplying by order produces (-inf + nan j) - bad
3201+
## We're skipping logmoment tests, so we might don't need to fix
3202+
## now, but if we ever do use run them, this might help:
3203+
# logx = np.log(x+0j)
3204+
# out = np.asarray(logpdf + order*_logexpxmexpy(logx, logcenter))
3205+
# i = (logx == logcenter)
3206+
# out[i] = logpdf[i]
3207+
# return out
31993208
return self._quadrature(logintegrand, args=(order, logcenter),
32003209
params=params, log=True)
32013210

@@ -4208,11 +4217,12 @@ class OrderStatisticDistribution(TransformedDistribution):
42084217
>>> import numpy as np
42094218
>>> import matplotlib.pyplot as plt
42104219
>>> from scipy import stats
4220+
>>> from scipy.stats._distribution_infrastructure import OrderStatisticDistribution
42114221
>>>
42124222
>>> X = stats.Normal()
42134223
>>> data = X.sample(shape=(10000, 5))
42144224
>>> ranks = np.sort(data, axis=1)
4215-
>>> Y = stats.OrderStatisticDistribution(X, r=4, n=5)
4225+
>>> Y = OrderStatisticDistribution(X, r=4, n=5)
42164226
>>>
42174227
>>> ax = plt.gca()
42184228
>>> Y.plot(ax=ax)

scipy/stats/_new_distribution_docs.json

Lines changed: 0 additions & 3 deletions
This file was deleted.

scipy/stats/_new_distributions.py

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import sys
2-
import json
3-
import os
42

53
import numpy as np
64
from numpy import inf
@@ -10,7 +8,7 @@
108
ContinuousDistribution, _RealDomain, _RealParameter, _Parameterization,
119
_combine_docs)
1210

13-
__all__ = ['Normal']
11+
__all__ = ['Normal', 'Uniform']
1412

1513

1614
class Normal(ContinuousDistribution):
@@ -269,8 +267,7 @@ def _moment_raw_formula(self, order, log_a, log_b, **kwargs):
269267
return t1 * t2
270268

271269

272-
# currently for testing only
273-
class _Uniform(ContinuousDistribution):
270+
class Uniform(ContinuousDistribution):
274271
r"""Uniform distribution.
275272
276273
The probability density function of the uniform distribution is:
@@ -284,7 +281,7 @@ class _Uniform(ContinuousDistribution):
284281

285282
_a_domain = _RealDomain(endpoints=(-inf, inf))
286283
_b_domain = _RealDomain(endpoints=('a', inf))
287-
_x_support = _RealDomain(endpoints=('a', 'b'), inclusive=(False, False))
284+
_x_support = _RealDomain(endpoints=('a', 'b'), inclusive=(True, True))
288285

289286
_a_param = _RealParameter('a', domain=_a_domain, typical=(1e-3, 0.9))
290287
_b_param = _RealParameter('b', domain=_b_domain, typical=(1.1, 1e3))
@@ -304,15 +301,56 @@ def _process_parameters(self, a=None, b=None, ab=None, **kwargs):
304301
kwargs.update(dict(a=a, b=b, ab=ab))
305302
return kwargs
306303

304+
def _logpdf_formula(self, x, *, ab, **kwargs):
305+
return np.where(np.isnan(x), np.nan, -np.log(ab))
306+
307307
def _pdf_formula(self, x, *, ab, **kwargs):
308-
return np.full(x.shape, 1/ab)
308+
return np.where(np.isnan(x), np.nan, 1/ab)
309+
310+
def _logcdf_formula(self, x, *, a, ab, **kwargs):
311+
with np.errstate(divide='ignore'):
312+
return np.log(x - a) - np.log(ab)
313+
314+
def _cdf_formula(self, x, *, a, ab, **kwargs):
315+
return (x - a) / ab
316+
317+
def _logccdf_formula(self, x, *, b, ab, **kwargs):
318+
with np.errstate(divide='ignore'):
319+
return np.log(b - x) - np.log(ab)
320+
321+
def _ccdf_formula(self, x, *, b, ab, **kwargs):
322+
return (b - x) / ab
309323

310-
def _icdf_formula(self, x, a, b, ab, **kwargs):
311-
return a + ab*x
324+
def _icdf_formula(self, p, *, a, ab, **kwargs):
325+
return a + ab*p
326+
327+
def _iccdf_formula(self, p, *, b, ab, **kwargs):
328+
return b - ab*p
329+
330+
def _entropy_formula(self, *, ab, **kwargs):
331+
return np.log(ab)
312332

313333
def _mode_formula(self, *, a, b, ab, **kwargs):
314334
return a + 0.5*ab
315335

336+
def _median_formula(self, *, a, b, ab, **kwargs):
337+
return a + 0.5*ab
338+
339+
def _moment_raw_formula(self, order, a, b, ab, **kwargs):
340+
np1 = order + 1
341+
return (b**np1 - a**np1) / (np1 * ab)
342+
343+
def _moment_central_formula(self, order, ab, **kwargs):
344+
return ab**2/12 if order == 2 else None
345+
346+
_moment_central_formula.orders = [2] # type: ignore[attr-defined]
347+
348+
def _sample_formula(self, sample_shape, full_shape, rng, a, b, ab, **kwargs):
349+
try:
350+
return rng.uniform(a, b, size=full_shape)[()]
351+
except OverflowError: # happens when there are NaNs
352+
return rng.uniform(0, 1, size=full_shape)*ab + a
353+
316354

317355
class _Gamma(ContinuousDistribution):
318356
# Gamma distribution for testing only
@@ -331,28 +369,7 @@ def _pdf_formula(self, x, *, a, **kwargs):
331369

332370
# Distribution classes need only define the summary and beginning of the extended
333371
# summary portion of the class documentation. All other documentation, including
334-
# examples, is generated automatically. This may be time-consuming for distributions
335-
# with slow methods, so we generate the documentation offline and store it as a static
336-
# `_new_distributions_docs.json` file. After making updates to the documentation of
337-
# a class, execute this file as a script to re-generate `_new_distribution_docs.json`.
338-
# Improvements to this system are welcome.
339-
_docfile = "_new_distribution_docs.json"
340-
_docdir = os.path.dirname(__file__)
341-
_docpath = os.path.abspath(os.path.join(_docdir, _docfile))
372+
# examples, is generated automatically.
342373
_module = sys.modules[__name__].__dict__
343-
344-
if __name__ == "__main__":
345-
# When executed as a script, generate the complete docstring for each distribution
346-
# class (`_combine_docs`), store them in a dictionary, and write to a file.
347-
docs = {}
348-
for dist_name in __all__:
349-
docs[dist_name] = _combine_docs(_module[dist_name])
350-
with open(_docpath, 'w') as f:
351-
json.dump(docs, f, indent=" ")
352-
353-
# When imported, load the dictionary from the file, and assign to each distribution
354-
# class's `__doc__` attribute the corresponding docstring.
355-
with open(_docpath) as f:
356-
docs = json.load(f)
357-
for dist_name in __all__:
358-
_module[dist_name].__doc__ = docs[dist_name]
374+
for dist_name in __all__:
375+
_module[dist_name].__doc__ = _combine_docs(_module[dist_name])

scipy/stats/_probability_distribution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ def median(self, *, method):
415415
Compute the median:
416416
417417
>>> X.median()
418-
5
418+
np.float64(5.0)
419419
>>> X.median() == X.icdf(0.5) == X.iccdf(0.5)
420420
True
421421

scipy/stats/meson.build

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ py3.install_sources([
134134
'_multicomp.py',
135135
'_multivariate.py',
136136
'_new_distributions.py',
137-
'_new_distribution_docs.json',
138137
'_odds_ratio.py',
139138
'_page_trend_test.py',
140139
'_probability_distribution.py',

0 commit comments

Comments
 (0)