Skip to content

Commit f51a5df

Browse files
committed
MAINT: stats.goodness_of_fit: transition to rng (SPEC 7)
1 parent c3da43f commit f51a5df

File tree

2 files changed

+38
-43
lines changed

2 files changed

+38
-43
lines changed

scipy/stats/_fit.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from collections import namedtuple
33
import numpy as np
44
from scipy import optimize, stats
5-
from scipy._lib._util import check_random_state
5+
from scipy._lib._util import check_random_state, _transition_to_rng
66

77

88
def _combine_bounds(name, user_bounds, shape_domain, integral):
@@ -738,9 +738,10 @@ def nlpsf(free_params, data=data): # bind data NOW
738738
'null_distribution'))
739739

740740

741+
@_transition_to_rng('random_state')
741742
def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
742743
guessed_params=None, statistic='ad', n_mc_samples=9999,
743-
random_state=None):
744+
rng=None):
744745
r"""
745746
Perform a goodness of fit test comparing data to a distribution family.
746747
@@ -797,18 +798,11 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
797798
The number of Monte Carlo samples drawn from the null hypothesized
798799
distribution to form the null distribution of the statistic. The
799800
sample size of each is the same as the given `data`.
800-
random_state : {None, int, `numpy.random.Generator`,
801-
`numpy.random.RandomState`}, optional
802-
803-
Pseudorandom number generator state used to generate the Monte Carlo
804-
samples.
805-
806-
If `random_state` is ``None`` (default), the
807-
`numpy.random.RandomState` singleton is used.
808-
If `random_state` is an int, a new ``RandomState`` instance is used,
809-
seeded with `random_state`.
810-
If `random_state` is already a ``Generator`` or ``RandomState``
811-
instance, then the provided instance is used.
801+
rng : `numpy.random.Generator`, optional
802+
Pseudorandom number generator state. When `rng` is None, a new
803+
`numpy.random.Generator` is created using entropy from the
804+
operating system. Types other than `numpy.random.Generator` are
805+
passed to `numpy.random.default_rng` to instantiate a ``Generator``.
812806
813807
Returns
814808
-------
@@ -996,7 +990,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
996990
997991
>>> known_params = {'loc': loc, 'scale': scale}
998992
>>> res = stats.goodness_of_fit(stats.norm, x, known_params=known_params,
999-
... statistic='ks', random_state=rng)
993+
... statistic='ks', rng=rng)
1000994
>>> res.statistic, res.pvalue
1001995
(0.1119257570456813, 0.2788)
1002996
@@ -1030,7 +1024,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
10301024
as described above. This is where `goodness_of_fit` excels.
10311025
10321026
>>> res = stats.goodness_of_fit(stats.norm, x, statistic='ks',
1033-
... random_state=rng)
1027+
... rng=rng)
10341028
>>> res.statistic, res.pvalue
10351029
(0.1119257570456813, 0.0196)
10361030
@@ -1062,7 +1056,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
10621056
estimate it directly.
10631057
10641058
>>> res = stats.goodness_of_fit(stats.norm, x, statistic='ad',
1065-
... random_state=rng)
1059+
... rng=rng)
10661060
>>> res.statistic, res.pvalue
10671061
(1.2139573337497467, 0.0034)
10681062
@@ -1078,7 +1072,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
10781072
>>> rng = np.random.default_rng()
10791073
>>> x = stats.chi(df=2.2, loc=0, scale=2).rvs(size=1000, random_state=rng)
10801074
>>> res = stats.goodness_of_fit(stats.rayleigh, x, statistic='cvm',
1081-
... known_params={'loc': 0}, random_state=rng)
1075+
... known_params={'loc': 0}, rng=rng)
10821076
10831077
This executes fairly quickly, but to check the reliability of the ``fit``
10841078
method, we should inspect the fit result.
@@ -1118,9 +1112,9 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
11181112
11191113
"""
11201114
args = _gof_iv(dist, data, known_params, fit_params, guessed_params,
1121-
statistic, n_mc_samples, random_state)
1115+
statistic, n_mc_samples, rng)
11221116
(dist, data, fixed_nhd_params, fixed_rfd_params, guessed_nhd_params,
1123-
guessed_rfd_params, statistic, n_mc_samples_int, random_state) = args
1117+
guessed_rfd_params, statistic, n_mc_samples_int, rng) = args
11241118

11251119
# Fit null hypothesis distribution to data
11261120
nhd_fit_fun = _get_fit_fun(dist, data, guessed_nhd_params,
@@ -1129,7 +1123,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
11291123
nhd_dist = dist(*nhd_vals)
11301124

11311125
def rvs(size):
1132-
return nhd_dist.rvs(size=size, random_state=random_state)
1126+
return nhd_dist.rvs(size=size, random_state=rng)
11331127

11341128
# Define statistic
11351129
fit_fun = _get_fit_fun(dist, data, guessed_rfd_params, fixed_rfd_params)
@@ -1299,7 +1293,7 @@ def _cramer_von_mises(dist, data, axis):
12991293

13001294

13011295
def _gof_iv(dist, data, known_params, fit_params, guessed_params, statistic,
1302-
n_mc_samples, random_state):
1296+
n_mc_samples, rng):
13031297

13041298
if not isinstance(dist, stats.rv_continuous):
13051299
message = ("`dist` must be a (non-frozen) instance of "
@@ -1349,7 +1343,7 @@ def _gof_iv(dist, data, known_params, fit_params, guessed_params, statistic,
13491343
message = "`n_mc_samples` must be an integer."
13501344
raise TypeError(message)
13511345

1352-
random_state = check_random_state(random_state)
1346+
rng = check_random_state(rng)
13531347

13541348
return (dist, data, fixed_nhd_params, fixed_rfd_params, guessed_nhd_params,
1355-
guessed_rfd_params, statistic, n_mc_samples_int, random_state)
1349+
guessed_rfd_params, statistic, n_mc_samples_int, rng)

scipy/stats/tests/test_fit.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -828,23 +828,24 @@ def test_gof_iv(self):
828828
with pytest.raises(TypeError, match=message):
829829
goodness_of_fit(dist, x, n_mc_samples=1000.5)
830830

831-
message = "'herring' cannot be used to seed a"
832-
with pytest.raises(ValueError, match=message):
833-
goodness_of_fit(dist, x, random_state='herring')
831+
message = "SeedSequence expects int or sequence"
832+
with pytest.raises(TypeError, match=message):
833+
goodness_of_fit(dist, x, rng='herring')
834834

835835
def test_against_ks(self):
836836
rng = np.random.default_rng(8517426291317196949)
837837
x = examgrades
838838
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
839839
res = goodness_of_fit(stats.norm, x, known_params=known_params,
840-
statistic='ks', random_state=rng)
840+
statistic='ks', rng=rng)
841841
ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact')
842842
assert_allclose(res.statistic, ref.statistic) # ~0.0848
843843
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.335
844844

845845
def test_against_lilliefors(self):
846846
rng = np.random.default_rng(2291803665717442724)
847847
x = examgrades
848+
# preserve use of old random_state during SPEC 7 transition
848849
res = goodness_of_fit(stats.norm, x, statistic='ks', random_state=rng)
849850
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
850851
ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact')
@@ -856,7 +857,7 @@ def test_against_cvm(self):
856857
x = examgrades
857858
known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)}
858859
res = goodness_of_fit(stats.norm, x, known_params=known_params,
859-
statistic='cvm', random_state=rng)
860+
statistic='cvm', rng=rng)
860861
ref = stats.cramervonmises(x, stats.norm(**known_params).cdf)
861862
assert_allclose(res.statistic, ref.statistic) # ~0.090
862863
assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.636
@@ -868,7 +869,7 @@ def test_against_anderson_case_0(self):
868869
# loc that produced critical value of statistic found w/ root_scalar
869870
known_params = {'loc': 45.01575354024957, 'scale': 30}
870871
res = goodness_of_fit(stats.norm, x, known_params=known_params,
871-
statistic='ad', random_state=rng)
872+
statistic='ad', rng=rng)
872873
assert_allclose(res.statistic, 2.492) # See [1] Table 1A 1.0
873874
assert_allclose(res.pvalue, 0.05, atol=5e-3)
874875

@@ -879,7 +880,7 @@ def test_against_anderson_case_1(self):
879880
# scale that produced critical value of statistic found w/ root_scalar
880881
known_params = {'scale': 29.957112639101933}
881882
res = goodness_of_fit(stats.norm, x, known_params=known_params,
882-
statistic='ad', random_state=rng)
883+
statistic='ad', rng=rng)
883884
assert_allclose(res.statistic, 0.908) # See [1] Table 1B 1.1
884885
assert_allclose(res.pvalue, 0.1, atol=5e-3)
885886

@@ -890,7 +891,7 @@ def test_against_anderson_case_2(self):
890891
# loc that produced critical value of statistic found w/ root_scalar
891892
known_params = {'loc': 44.5680212261933}
892893
res = goodness_of_fit(stats.norm, x, known_params=known_params,
893-
statistic='ad', random_state=rng)
894+
statistic='ad', rng=rng)
894895
assert_allclose(res.statistic, 2.904) # See [1] Table 1B 1.2
895896
assert_allclose(res.pvalue, 0.025, atol=5e-3)
896897

@@ -900,7 +901,7 @@ def test_against_anderson_case_3(self):
900901
# c that produced critical value of statistic found w/ root_scalar
901902
x = stats.skewnorm.rvs(1.4477847789132101, loc=1, scale=2, size=100,
902903
random_state=rng)
903-
res = goodness_of_fit(stats.norm, x, statistic='ad', random_state=rng)
904+
res = goodness_of_fit(stats.norm, x, statistic='ad', rng=rng)
904905
assert_allclose(res.statistic, 0.559) # See [1] Table 1B 1.2
905906
assert_allclose(res.pvalue, 0.15, atol=5e-3)
906907

@@ -911,7 +912,7 @@ def test_against_anderson_gumbel_r(self):
911912
x = stats.genextreme(0.051896837188595134, loc=0.5,
912913
scale=1.5).rvs(size=1000, random_state=rng)
913914
res = goodness_of_fit(stats.gumbel_r, x, statistic='ad',
914-
random_state=rng)
915+
rng=rng)
915916
ref = stats.anderson(x, dist='gumbel_r')
916917
assert_allclose(res.statistic, ref.critical_values[0])
917918
assert_allclose(res.pvalue, ref.significance_level[0]/100, atol=5e-3)
@@ -922,7 +923,7 @@ def test_against_filliben_norm(self):
922923
y = [6, 1, -4, 8, -2, 5, 0]
923924
known_params = {'loc': 0, 'scale': 1}
924925
res = stats.goodness_of_fit(stats.norm, y, known_params=known_params,
925-
statistic="filliben", random_state=rng)
926+
statistic="filliben", rng=rng)
926927
# Slight discrepancy presumably due to roundoff in Filliben's
927928
# calculation. Using exact order statistic medians instead of
928929
# Filliben's approximation doesn't account for it.
@@ -944,10 +945,10 @@ def test_filliben_property(self):
944945
rng = np.random.default_rng(8535677809395478813)
945946
x = rng.normal(loc=10, scale=0.5, size=100)
946947
res = stats.goodness_of_fit(stats.norm, x,
947-
statistic="filliben", random_state=rng)
948+
statistic="filliben", rng=rng)
948949
known_params = {'loc': 0, 'scale': 1}
949950
ref = stats.goodness_of_fit(stats.norm, x, known_params=known_params,
950-
statistic="filliben", random_state=rng)
951+
statistic="filliben", rng=rng)
951952
assert_allclose(res.statistic, ref.statistic, rtol=1e-15)
952953

953954
@pytest.mark.parametrize('case', [(25, [.928, .937, .950, .958, .966]),
@@ -960,7 +961,7 @@ def test_against_filliben_norm_table(self, case):
960961
x = rng.random(n)
961962
known_params = {'loc': 0, 'scale': 1}
962963
res = stats.goodness_of_fit(stats.norm, x, known_params=known_params,
963-
statistic="filliben", random_state=rng)
964+
statistic="filliben", rng=rng)
964965
percentiles = np.array([0.005, 0.01, 0.025, 0.05, 0.1])
965966
res = stats.scoreatpercentile(res.null_distribution, percentiles*100)
966967
assert_allclose(res, ref, atol=2e-3)
@@ -980,7 +981,7 @@ def test_against_ppcc(self, case):
980981
rng = np.random.default_rng(7777775561439803116)
981982
x = rng.normal(size=n)
982983
res = stats.goodness_of_fit(stats.rayleigh, x, statistic="filliben",
983-
random_state=rng)
984+
rng=rng)
984985
assert_allclose(res.statistic, ref_statistic, rtol=1e-4)
985986
assert_allclose(res.pvalue, ref_pvalue, atol=1.5e-2)
986987

@@ -1000,7 +1001,7 @@ def test_params_effects(self):
10001001
res1 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
10011002
guessed_params=guessed_params,
10021003
fit_params=fit_params,
1003-
known_params=known_params, random_state=rng)
1004+
known_params=known_params, rng=rng)
10041005
assert not np.allclose(res1.fit_result.params.c, 13.4)
10051006
assert_equal(res1.fit_result.params.scale, 13.73)
10061007
assert_equal(res1.fit_result.params.loc, -13.85)
@@ -1012,7 +1013,7 @@ def test_params_effects(self):
10121013
res2 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
10131014
guessed_params=guessed_params,
10141015
fit_params=fit_params,
1015-
known_params=known_params, random_state=rng)
1016+
known_params=known_params, rng=rng)
10161017
assert not np.allclose(res2.fit_result.params.c,
10171018
res1.fit_result.params.c, rtol=1e-8)
10181019
assert not np.allclose(res2.null_distribution,
@@ -1028,7 +1029,7 @@ def test_params_effects(self):
10281029
res3 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2,
10291030
guessed_params=guessed_params,
10301031
fit_params=fit_params,
1031-
known_params=known_params, random_state=rng)
1032+
known_params=known_params, rng=rng)
10321033
assert_equal(res3.fit_result.params.c, 13.4)
10331034
assert_equal(res3.fit_result.params.scale, 13.73)
10341035
assert_equal(res3.fit_result.params.loc, -13.85)
@@ -1058,7 +1059,7 @@ def greenwood(dist, data, *, axis):
10581059
data = stats.expon.rvs(size=5, random_state=rng)
10591060
result = goodness_of_fit(stats.expon, data,
10601061
known_params={'loc': 0, 'scale': 1},
1061-
statistic=greenwood, random_state=rng)
1062+
statistic=greenwood, rng=rng)
10621063
p = [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99]
10631064
exact_quantiles = [
10641065
.183863, .199403, .210088, .226040, .239947, .253677, .268422,

0 commit comments

Comments
 (0)