MAINT: stats.ContinuousDistribution: improve doc generation; fix broken doctests by making stats.Uniform public again (scipy#22027)

mdhaber · web-flow · commit a8030703a24f · 2024-12-11T18:09:35.000-05:00
* MAINT: stats.ContinuousDistribution: revert to dynamic doc generation

* ENH: stats.Uniform: add uniform distribution

* BUILD: stats: remove reference to eliminated _new_distribution_docs.json

* MAINT: stats.Uniform: improvements

* MAINT: integrate.tanhsinh: fix regression

* DOC: stats._distribution_infrastructure: fix doctests

[skip ci]
diff --git a/scipy/integrate/_tanhsinh.py b/scipy/integrate/_tanhsinh.py
@@ -382,8 +382,8 @@ def tanhsinh(f, a, b, *, args=(), log=False, maxlevel=None, minlevel=2,
         n=minlevel, nit=nit, nfev=nfev, status=status,  # iter/eval counts
         xr0=xr0, fr0=fr0, wr0=wr0, xl0=xl0, fl0=fl0, wl0=wl0, d4=d4,  # err est
         ainf=ainf, binf=binf, abinf=abinf, a0=xp.reshape(a0, (-1, 1)),  # transforms
-        # Store the weights in an object so they can't get compressed
-        # Using RichResult to allow dot notation, but a dict would work
+        # Store the xjc/wj pair cache in an object so they can't get compressed
+        # Using RichResult to allow dot notation, but a dictionary would suffice
         pair_cache=_RichResult(xjc=None, wj=None, indices=[0], h0=None))  # pair cache
 
     # Constant scalars don't need to be put in `work` unless they need to be
diff --git a/scipy/stats/__init__.py b/scipy/stats/__init__.py
@@ -470,6 +470,7 @@
 
    make_distribution
    Normal
+   Uniform
    Mixture
    order_statistic
    truncate
@@ -648,7 +649,7 @@
 from ._distribution_infrastructure import (
     make_distribution, Mixture, order_statistic, truncate, exp, log, abs
 )
-from ._new_distributions import Normal
+from ._new_distributions import Normal, Uniform
 from ._mgc import multiscale_graphcorr
 from ._correlation import chatterjeexi
 
diff --git a/scipy/stats/_distribution_infrastructure.py b/scipy/stats/_distribution_infrastructure.py
@@ -1174,11 +1174,11 @@ def _log1mexp(x):
     Examples
     --------
     >>> import numpy as np
-    >>> from scipy.special import log1m
+    >>> from scipy.stats._distribution_infrastructure import _log1mexp
     >>> x = 1e-300  # log of a number very close to 1
     >>> _log1mexp(x)  # log of the complement of a number very close to 1
     -690.7755278982137
-    >>> # p.log(1 - np.exp(x))  # -inf; emits warning
+    >>> # np.log1p(-np.exp(x))  # -inf; emits warning
 
     """
     def f1(x):
@@ -3195,7 +3195,16 @@ def _logmoment(self, order=1, *, logcenter=None, standardized=False):
     def _logmoment_quad(self, order, logcenter, **params):
         def logintegrand(x, order, logcenter, **params):
             logpdf = self._logpdf_dispatch(x, **params)
-            return logpdf + order*_logexpxmexpy(np.log(x+0j), logcenter)
+            return logpdf + order * _logexpxmexpy(np.log(x + 0j), logcenter)
+            ## if logx == logcenter, `_logexpxmexpy` returns (-inf + 0j)
+            ## multiplying by order produces (-inf + nan j) - bad
+            ## We're skipping logmoment tests, so we might don't need to fix
+            ## now, but if we ever do use run them, this might help:
+            # logx = np.log(x+0j)
+            # out = np.asarray(logpdf + order*_logexpxmexpy(logx, logcenter))
+            # i = (logx == logcenter)
+            # out[i] = logpdf[i]
+            # return out
         return self._quadrature(logintegrand, args=(order, logcenter),
                                 params=params, log=True)
 
@@ -4208,11 +4217,12 @@ class OrderStatisticDistribution(TransformedDistribution):
     >>> import numpy as np
     >>> import matplotlib.pyplot as plt
     >>> from scipy import stats
+    >>> from scipy.stats._distribution_infrastructure import OrderStatisticDistribution
     >>>
     >>> X = stats.Normal()
     >>> data = X.sample(shape=(10000, 5))
     >>> ranks = np.sort(data, axis=1)
-    >>> Y = stats.OrderStatisticDistribution(X, r=4, n=5)
+    >>> Y = OrderStatisticDistribution(X, r=4, n=5)
     >>>
     >>> ax = plt.gca()
     >>> Y.plot(ax=ax)
diff --git a/scipy/stats/_new_distribution_docs.json b/scipy/stats/_new_distribution_docs.json
diff --git a/scipy/stats/_new_distributions.py b/scipy/stats/_new_distributions.py
@@ -1,6 +1,4 @@
 import sys
-import json
-import os
 
 import numpy as np
 from numpy import inf
@@ -10,7 +8,7 @@
     ContinuousDistribution, _RealDomain, _RealParameter, _Parameterization,
     _combine_docs)
 
-__all__ = ['Normal']
+__all__ = ['Normal', 'Uniform']
 
 
 class Normal(ContinuousDistribution):
@@ -269,8 +267,7 @@ def _moment_raw_formula(self, order, log_a, log_b, **kwargs):
         return t1 * t2
 
 
-# currently for testing only
-class _Uniform(ContinuousDistribution):
+class Uniform(ContinuousDistribution):
     r"""Uniform distribution.
 
     The probability density function of the uniform distribution is:
@@ -284,7 +281,7 @@ class _Uniform(ContinuousDistribution):
 
     _a_domain = _RealDomain(endpoints=(-inf, inf))
     _b_domain = _RealDomain(endpoints=('a', inf))
-    _x_support = _RealDomain(endpoints=('a', 'b'), inclusive=(False, False))
+    _x_support = _RealDomain(endpoints=('a', 'b'), inclusive=(True, True))
 
     _a_param = _RealParameter('a', domain=_a_domain, typical=(1e-3, 0.9))
     _b_param = _RealParameter('b', domain=_b_domain, typical=(1.1, 1e3))
@@ -304,15 +301,56 @@ def _process_parameters(self, a=None, b=None, ab=None, **kwargs):
         kwargs.update(dict(a=a, b=b, ab=ab))
         return kwargs
 
+    def _logpdf_formula(self, x, *, ab, **kwargs):
+        return np.where(np.isnan(x), np.nan, -np.log(ab))
+
     def _pdf_formula(self, x, *, ab, **kwargs):
-        return np.full(x.shape, 1/ab)
+        return np.where(np.isnan(x), np.nan, 1/ab)
+
+    def _logcdf_formula(self, x, *, a, ab, **kwargs):
+        with np.errstate(divide='ignore'):
+            return np.log(x - a) - np.log(ab)
+
+    def _cdf_formula(self, x, *, a, ab, **kwargs):
+        return (x - a) / ab
+
+    def _logccdf_formula(self, x, *, b, ab, **kwargs):
+        with np.errstate(divide='ignore'):
+            return np.log(b - x) - np.log(ab)
+
+    def _ccdf_formula(self, x, *, b, ab, **kwargs):
+        return (b - x) / ab
 
-    def _icdf_formula(self, x, a, b, ab, **kwargs):
-        return a + ab*x
+    def _icdf_formula(self, p, *, a, ab, **kwargs):
+        return a + ab*p
+
+    def _iccdf_formula(self, p, *, b, ab, **kwargs):
+        return b - ab*p
+
+    def _entropy_formula(self, *, ab, **kwargs):
+        return np.log(ab)
 
     def _mode_formula(self, *, a, b, ab, **kwargs):
         return a + 0.5*ab
 
+    def _median_formula(self, *, a, b, ab, **kwargs):
+        return a + 0.5*ab
+
+    def _moment_raw_formula(self, order, a, b, ab, **kwargs):
+        np1 = order + 1
+        return (b**np1 - a**np1) / (np1 * ab)
+
+    def _moment_central_formula(self, order, ab, **kwargs):
+        return ab**2/12 if order == 2 else None
+
+    _moment_central_formula.orders = [2]  # type: ignore[attr-defined]
+
+    def _sample_formula(self, sample_shape, full_shape, rng, a, b, ab, **kwargs):
+        try:
+            return rng.uniform(a, b, size=full_shape)[()]
+        except OverflowError:  # happens when there are NaNs
+            return rng.uniform(0, 1, size=full_shape)*ab + a
+
 
 class _Gamma(ContinuousDistribution):
     # Gamma distribution for testing only
@@ -331,28 +369,7 @@ def _pdf_formula(self, x, *, a, **kwargs):
 
 # Distribution classes need only define the summary and beginning of the extended
 # summary portion of the class documentation. All other documentation, including
-# examples, is generated automatically. This may be time-consuming for distributions
-# with slow methods, so we generate the documentation offline and store it as a static
-# `_new_distributions_docs.json` file. After making updates to the documentation of
-# a class, execute this file as a script to re-generate `_new_distribution_docs.json`.
-# Improvements to this system are welcome.
-_docfile = "_new_distribution_docs.json"
-_docdir = os.path.dirname(__file__)
-_docpath = os.path.abspath(os.path.join(_docdir, _docfile))
+# examples, is generated automatically.
 _module = sys.modules[__name__].__dict__
-
-if __name__ == "__main__":
-    # When executed as a script, generate the complete docstring for each distribution
-    # class (`_combine_docs`), store them in a dictionary, and write to a file.
-    docs = {}
-    for dist_name in __all__:
-        docs[dist_name] = _combine_docs(_module[dist_name])
-    with open(_docpath, 'w') as f:
-        json.dump(docs, f, indent="    ")
-
-# When imported, load the dictionary from the file, and assign to each distribution
-# class's `__doc__` attribute the corresponding docstring.
-with open(_docpath) as f:
-    docs = json.load(f)
-    for dist_name in __all__:
-        _module[dist_name].__doc__ = docs[dist_name]
+for dist_name in __all__:
+    _module[dist_name].__doc__ = _combine_docs(_module[dist_name])
diff --git a/scipy/stats/_probability_distribution.py b/scipy/stats/_probability_distribution.py
@@ -415,7 +415,7 @@ def median(self, *, method):
         Compute the median:
 
         >>> X.median()
-        5
+        np.float64(5.0)
         >>> X.median() == X.icdf(0.5) == X.iccdf(0.5)
         True
 
diff --git a/scipy/stats/meson.build b/scipy/stats/meson.build
@@ -134,7 +134,6 @@ py3.install_sources([
     '_multicomp.py',
     '_multivariate.py',
     '_new_distributions.py',
-    '_new_distribution_docs.json',
     '_odds_ratio.py',
     '_page_trend_test.py',
     '_probability_distribution.py',
diff --git a/scipy/stats/tests/test_continuous.py b/scipy/stats/tests/test_continuous.py