Skip to content

Commit 6e6ccc7

Browse files
authored
API: stats.{PermutationMethod,BootstrapMethod}: transition to rng (SPEC 7) (scipy#21886)
* MAINT: stats.PermutationMethod: transition to rng (SPEC 7) * Apply suggestions from code review * STY: stats.PermutationMethod: fix mypy and lint complaints * MAINT: stats.PermutationMethod: adjustments per review * MAINT: stats.BootstrapMethod: transition to rng (SPEC 7)
1 parent 553bd3e commit 6e6ccc7

File tree

5 files changed

+151
-37
lines changed

5 files changed

+151
-37
lines changed

scipy/stats/_resampling.py

Lines changed: 134 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from itertools import combinations, permutations, product
66
from collections.abc import Sequence
7-
from dataclasses import dataclass
7+
from dataclasses import dataclass, field
88
import inspect
99

1010
from scipy._lib._util import (check_random_state, _rename_parameter, rng_integers,
@@ -2167,6 +2167,20 @@ def _asdict(self):
21672167
rvs=self.rvs)
21682168

21692169

2170+
_rs_deprecation = ("Use of attribute `random_state` is deprecated and replaced by "
2171+
"`rng`. Support for `random_state` will be removed in SciPy 1.19.0. "
2172+
"To silence this warning and ensure consistent behavior in SciPy "
2173+
"1.19.0, control the RNG using attribute `rng`. Values set using "
2174+
"attribute `rng` will be validated by `np.random.default_rng`, so "
2175+
"the behavior corresponding with a given value may change compared "
2176+
"to use of `random_state`. For example, 1) `None` will result in "
2177+
"unpredictable random numbers, 2) an integer will result in a "
2178+
"different stream of random numbers, (with the same distribution), "
2179+
"and 3) `np.random` or `RandomState` instances will result in an "
2180+
"error. See the documentation of `default_rng` for more "
2181+
"information.")
2182+
2183+
21702184
@dataclass
21712185
class PermutationMethod(ResamplingMethod):
21722186
"""Configuration information for a permutation hypothesis test.
@@ -2184,24 +2198,69 @@ class PermutationMethod(ResamplingMethod):
21842198
the statistic. Batch sizes >>1 tend to be faster when the statistic
21852199
is vectorized, but memory usage scales linearly with the batch size.
21862200
Default is ``None``, which processes all resamples in a single batch.
2187-
random_state : {None, int, `numpy.random.Generator`,
2188-
`numpy.random.RandomState`}, optional
2189-
2190-
Pseudorandom number generator state used to generate resamples.
2201+
rng : `numpy.random.Generator`, optional
2202+
Pseudorandom number generator used to perform resampling.
2203+
2204+
If `rng` is passed by keyword to the initializer or the `rng` attribute is used
2205+
directly, types other than `numpy.random.Generator` are passed to
2206+
`numpy.random.default_rng` to instantiate a ``Generator``.
2207+
If `rng` is already a ``Generator`` instance, then the provided instance is
2208+
used. Specify `rng` for repeatable behavior.
2209+
2210+
If this argument is passed by position, if `random_state` is passed by keyword
2211+
into the initializer, or if the `random_state` attribute is used directly,
2212+
legacy behavior for `random_state` applies:
2213+
2214+
- If `random_state` is None (or `numpy.random`), the `numpy.random.RandomState`
2215+
singleton is used.
2216+
- If `random_state` is an int, a new ``RandomState`` instance is used,
2217+
seeded with `random_state`.
2218+
- If `random_state` is already a ``Generator`` or ``RandomState`` instance then
2219+
that instance is used.
2220+
2221+
.. versionchanged:: 1.15.0
2222+
2223+
As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
2224+
transition from use of `numpy.random.RandomState` to
2225+
`numpy.random.Generator`, this attribute name was changed from
2226+
`random_state` to `rng`. For an interim period, both names will continue to
2227+
work, although only one may be specified at a time. After the interim
2228+
period, uses of `random_state` will emit warnings. The behavior of both
2229+
`random_state` and `rng` are outlined above, but only `rng` should be used
2230+
in new code.
21912231
2192-
If `random_state` is already a ``Generator`` or ``RandomState``
2193-
instance, then that instance is used.
2194-
If `random_state` is an int, a new ``RandomState`` instance is used,
2195-
seeded with `random_state`.
2196-
If `random_state` is ``None`` (default), the
2197-
`numpy.random.RandomState` singleton is used.
21982232
"""
2199-
random_state: object = None
2233+
rng: object # type: ignore[misc]
2234+
_rng: object = field(init=False, repr=False, default=None) # type: ignore[assignment]
2235+
2236+
@property
2237+
def random_state(self):
2238+
# Uncomment in SciPy 1.17.0
2239+
# warnings.warn(_rs_deprecation, DeprecationWarning, stacklevel=2)
2240+
return self._rng
2241+
2242+
@random_state.setter
2243+
def random_state(self, val):
2244+
# Uncomment in SciPy 1.17.0
2245+
# warnings.warn(_rs_deprecation, DeprecationWarning, stacklevel=2)
2246+
self._rng = val
2247+
2248+
@property # type: ignore[no-redef]
2249+
def rng(self): # noqa: F811
2250+
return self._rng
2251+
2252+
@random_state.setter
2253+
def rng(self, val): # noqa: F811
2254+
self._rng = np.random.default_rng(val)
2255+
2256+
@_transition_to_rng('random_state', position_num=3, replace_doc=False)
2257+
def __init__(self, n_resamples=9999, batch=None, rng=None):
2258+
self._rng = rng # don't validate with `default_rng` during SPEC 7 transition
2259+
super().__init__(n_resamples=n_resamples, batch=batch)
22002260

22012261
def _asdict(self):
22022262
# `dataclasses.asdict` deepcopies; we don't want that.
2203-
return dict(n_resamples=self.n_resamples, batch=self.batch,
2204-
random_state=self.random_state)
2263+
return dict(n_resamples=self.n_resamples, batch=self.batch, rng=self.rng)
22052264

22062265

22072266
@dataclass
@@ -2220,26 +2279,73 @@ class BootstrapMethod(ResamplingMethod):
22202279
the statistic. Batch sizes >>1 tend to be faster when the statistic
22212280
is vectorized, but memory usage scales linearly with the batch size.
22222281
Default is ``None``, which processes all resamples in a single batch.
2223-
random_state : {None, int, `numpy.random.Generator`,
2224-
`numpy.random.RandomState`}, optional
2225-
2226-
Pseudorandom number generator state used to generate resamples.
2227-
2228-
If `random_state` is already a ``Generator`` or ``RandomState``
2229-
instance, then that instance is used.
2230-
If `random_state` is an int, a new ``RandomState`` instance is used,
2231-
seeded with `random_state`.
2232-
If `random_state` is ``None`` (default), the
2233-
`numpy.random.RandomState` singleton is used.
2234-
2235-
method : {'bca', 'percentile', 'basic'}
2282+
rng : `numpy.random.Generator`, optional
2283+
Pseudorandom number generator used to perform resampling.
2284+
2285+
If `rng` is passed by keyword to the initializer or the `rng` attribute is used
2286+
directly, types other than `numpy.random.Generator` are passed to
2287+
`numpy.random.default_rng` to instantiate a ``Generator``.
2288+
If `rng` is already a ``Generator`` instance, then the provided instance is
2289+
used. Specify `rng` for repeatable behavior.
2290+
2291+
If this argument is passed by position, if `random_state` is passed by keyword
2292+
into the initializer, or if the `random_state` attribute is used directly,
2293+
legacy behavior for `random_state` applies:
2294+
2295+
- If `random_state` is None (or `numpy.random`), the `numpy.random.RandomState`
2296+
singleton is used.
2297+
- If `random_state` is an int, a new ``RandomState`` instance is used,
2298+
seeded with `random_state`.
2299+
- If `random_state` is already a ``Generator`` or ``RandomState`` instance then
2300+
that instance is used.
2301+
2302+
.. versionchanged:: 1.15.0
2303+
2304+
As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
2305+
transition from use of `numpy.random.RandomState` to
2306+
`numpy.random.Generator`, this attribute name was changed from
2307+
`random_state` to `rng`. For an interim period, both names will continue to
2308+
work, although only one may be specified at a time. After the interim
2309+
period, uses of `random_state` will emit warnings. The behavior of both
2310+
`random_state` and `rng` are outlined above, but only `rng` should be used
2311+
in new code.
2312+
2313+
method : {'BCa', 'percentile', 'basic'}
22362314
Whether to use the 'percentile' bootstrap ('percentile'), the 'basic'
22372315
(AKA 'reverse') bootstrap ('basic'), or the bias-corrected and
22382316
accelerated bootstrap ('BCa', default).
2317+
22392318
"""
2240-
random_state: object = None
2319+
rng: object # type: ignore[misc]
2320+
_rng: object = field(init=False, repr=False, default=None) # type: ignore[assignment]
22412321
method: str = 'BCa'
22422322

2323+
@property
2324+
def random_state(self):
2325+
# Uncomment in SciPy 1.17.0
2326+
# warnings.warn(_rs_deprecation, DeprecationWarning, stacklevel=2)
2327+
return self._rng
2328+
2329+
@random_state.setter
2330+
def random_state(self, val):
2331+
# Uncomment in SciPy 1.17.0
2332+
# warnings.warn(_rs_deprecation, DeprecationWarning, stacklevel=2)
2333+
self._rng = val
2334+
2335+
@property # type: ignore[no-redef]
2336+
def rng(self): # noqa: F811
2337+
return self._rng
2338+
2339+
@random_state.setter
2340+
def rng(self, val): # noqa: F811
2341+
self._rng = np.random.default_rng(val)
2342+
2343+
@_transition_to_rng('random_state', position_num=3, replace_doc=False)
2344+
def __init__(self, n_resamples=9999, batch=None, rng=None, method='BCa'):
2345+
self._rng = rng # don't validate with `default_rng` during SPEC 7 transition
2346+
self.method = method
2347+
super().__init__(n_resamples=n_resamples, batch=batch)
2348+
22432349
def _asdict(self):
22442350
# `dataclasses.asdict` deepcopies; we don't want that.
22452351
return dict(n_resamples=self.n_resamples, batch=self.batch,

scipy/stats/tests/test_correlation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_permutation_asymptotic(self, y_continuous):
4444
x = rng.random(size=shape)
4545
y = (rng.random(size=shape) if y_continuous
4646
else rng.integers(0, 10, size=shape))
47-
method = stats.PermutationMethod(random_state=rng)
47+
method = stats.PermutationMethod(rng=rng)
4848
res = stats.chatterjeexi(x, y, method=method,
4949
y_continuous=y_continuous, axis=-1)
5050
ref = stats.chatterjeexi(x, y, y_continuous=y_continuous, axis=-1)

scipy/stats/tests/test_hypotests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,19 +1817,19 @@ def test_method(self):
18171817
x, y = rng.random(size=(2, 10))
18181818

18191819
rng = np.random.default_rng(1520514347193347862)
1820-
method = stats.PermutationMethod(n_resamples=10, random_state=rng)
1820+
method = stats.PermutationMethod(n_resamples=10, rng=rng)
18211821
res1 = stats.bws_test(x, y, method=method)
18221822

18231823
assert len(res1.null_distribution) == 10
18241824

18251825
rng = np.random.default_rng(1520514347193347862)
1826-
method = stats.PermutationMethod(n_resamples=10, random_state=rng)
1826+
method = stats.PermutationMethod(n_resamples=10, rng=rng)
18271827
res2 = stats.bws_test(x, y, method=method)
18281828

18291829
assert_allclose(res1.null_distribution, res2.null_distribution)
18301830

18311831
rng = np.random.default_rng(5205143471933478621)
1832-
method = stats.PermutationMethod(n_resamples=10, random_state=rng)
1832+
method = stats.PermutationMethod(n_resamples=10, rng=rng)
18331833
res3 = stats.bws_test(x, y, method=method)
18341834

18351835
assert not np.allclose(res3.null_distribution, res1.null_distribution)

scipy/stats/tests/test_morestats.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def test_example2a(self):
464464
assert_allclose(p, 0.0041, atol=0.00025)
465465

466466
rng = np.random.default_rng(6989860141921615054)
467-
method = stats.PermutationMethod(n_resamples=9999, random_state=rng)
467+
method = stats.PermutationMethod(n_resamples=9999, rng=rng)
468468
res = stats.anderson_ksamp(samples, midrank=False, method=method)
469469
assert_array_equal(res.statistic, Tk)
470470
assert_array_equal(res.critical_values, tm)
@@ -1728,14 +1728,15 @@ def test_permutation_method(self, size):
17281728

17291729
x = rng.random(size=size*10)
17301730
rng = np.random.default_rng(59234803482850134)
1731-
pm = stats.PermutationMethod(n_resamples=99, random_state=rng)
1731+
pm = stats.PermutationMethod(n_resamples=99, rng=rng)
17321732
ref = stats.wilcoxon(x, method=pm)
1733+
# preserve use of old random_state during SPEC 7 transition
17331734
rng = np.random.default_rng(59234803482850134)
17341735
pm = stats.PermutationMethod(n_resamples=99, random_state=rng)
17351736
res = stats.wilcoxon(x, method=pm)
17361737

17371738
assert_equal(np.round(res.pvalue, 2), res.pvalue) # n_resamples used
1738-
assert_equal(res.pvalue, ref.pvalue) # random_state used
1739+
assert_equal(res.pvalue, ref.pvalue) # rng/random_state used
17391740

17401741
def test_method_auto_nan_propagate_ND_length_gt_50_gh20591(self):
17411742
# When method!='asymptotic', nan_policy='propagate', and a slice of

scipy/stats/tests/test_stats.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ def test_resampling_pvalue(self, method, alternative):
631631
size = (2, 100) if method == 'permutation' else (2, 1000)
632632
x = rng.normal(size=size)
633633
y = rng.normal(size=size)
634-
methods = {'permutation': stats.PermutationMethod(random_state=rng),
634+
methods = {'permutation': stats.PermutationMethod(rng=rng),
635635
'monte_carlo': stats.MonteCarloMethod(rvs=(rng.normal,)*2)}
636636
method = methods[method]
637637
res = stats.pearsonr(x, y, alternative=alternative, method=method, axis=-1)
@@ -647,12 +647,19 @@ def test_bootstrap_ci(self, alternative):
647647
y = rng.normal(size=(2, 100))
648648
res = stats.pearsonr(x, y, alternative=alternative, axis=-1)
649649

650+
# preserve use of old random_state during SPEC 7 transition
651+
rng = np.random.default_rng(724358723498249852)
650652
method = stats.BootstrapMethod(random_state=rng)
651653
res_ci = res.confidence_interval(method=method)
652654
ref_ci = res.confidence_interval()
653-
654655
assert_allclose(res_ci, ref_ci, atol=1.5e-2)
655656

657+
# `rng` is the new argument name`
658+
rng = np.random.default_rng(724358723498249852)
659+
method = stats.BootstrapMethod(rng=rng)
660+
res_ci2 = res.confidence_interval(method=method)
661+
assert_allclose(res_ci2, res_ci)
662+
656663
@pytest.mark.skip_xp_backends(np_only=True)
657664
@pytest.mark.parametrize('axis', [0, 1])
658665
def test_axis01(self, axis, xp):

0 commit comments

Comments
 (0)