2
2
from collections import namedtuple
3
3
import numpy as np
4
4
from scipy import optimize , stats
5
- from scipy ._lib ._util import check_random_state
5
+ from scipy ._lib ._util import check_random_state , _transition_to_rng
6
6
7
7
8
8
def _combine_bounds (name , user_bounds , shape_domain , integral ):
@@ -738,9 +738,10 @@ def nlpsf(free_params, data=data): # bind data NOW
738
738
'null_distribution' ))
739
739
740
740
741
+ @_transition_to_rng ('random_state' )
741
742
def goodness_of_fit (dist , data , * , known_params = None , fit_params = None ,
742
743
guessed_params = None , statistic = 'ad' , n_mc_samples = 9999 ,
743
- random_state = None ):
744
+ rng = None ):
744
745
r"""
745
746
Perform a goodness of fit test comparing data to a distribution family.
746
747
@@ -797,18 +798,11 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
797
798
The number of Monte Carlo samples drawn from the null hypothesized
798
799
distribution to form the null distribution of the statistic. The
799
800
sample size of each is the same as the given `data`.
800
- random_state : {None, int, `numpy.random.Generator`,
801
- `numpy.random.RandomState`}, optional
802
-
803
- Pseudorandom number generator state used to generate the Monte Carlo
804
- samples.
805
-
806
- If `random_state` is ``None`` (default), the
807
- `numpy.random.RandomState` singleton is used.
808
- If `random_state` is an int, a new ``RandomState`` instance is used,
809
- seeded with `random_state`.
810
- If `random_state` is already a ``Generator`` or ``RandomState``
811
- instance, then the provided instance is used.
801
+ rng : `numpy.random.Generator`, optional
802
+ Pseudorandom number generator state. When `rng` is None, a new
803
+ `numpy.random.Generator` is created using entropy from the
804
+ operating system. Types other than `numpy.random.Generator` are
805
+ passed to `numpy.random.default_rng` to instantiate a ``Generator``.
812
806
813
807
Returns
814
808
-------
@@ -996,7 +990,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
996
990
997
991
>>> known_params = {'loc': loc, 'scale': scale}
998
992
>>> res = stats.goodness_of_fit(stats.norm, x, known_params=known_params,
999
- ... statistic='ks', random_state =rng)
993
+ ... statistic='ks', rng =rng)
1000
994
>>> res.statistic, res.pvalue
1001
995
(0.1119257570456813, 0.2788)
1002
996
@@ -1030,7 +1024,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
1030
1024
as described above. This is where `goodness_of_fit` excels.
1031
1025
1032
1026
>>> res = stats.goodness_of_fit(stats.norm, x, statistic='ks',
1033
- ... random_state =rng)
1027
+ ... rng =rng)
1034
1028
>>> res.statistic, res.pvalue
1035
1029
(0.1119257570456813, 0.0196)
1036
1030
@@ -1062,7 +1056,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
1062
1056
estimate it directly.
1063
1057
1064
1058
>>> res = stats.goodness_of_fit(stats.norm, x, statistic='ad',
1065
- ... random_state =rng)
1059
+ ... rng =rng)
1066
1060
>>> res.statistic, res.pvalue
1067
1061
(1.2139573337497467, 0.0034)
1068
1062
@@ -1078,7 +1072,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
1078
1072
>>> rng = np.random.default_rng()
1079
1073
>>> x = stats.chi(df=2.2, loc=0, scale=2).rvs(size=1000, random_state=rng)
1080
1074
>>> res = stats.goodness_of_fit(stats.rayleigh, x, statistic='cvm',
1081
- ... known_params={'loc': 0}, random_state =rng)
1075
+ ... known_params={'loc': 0}, rng =rng)
1082
1076
1083
1077
This executes fairly quickly, but to check the reliability of the ``fit``
1084
1078
method, we should inspect the fit result.
@@ -1118,9 +1112,9 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
1118
1112
1119
1113
"""
1120
1114
args = _gof_iv (dist , data , known_params , fit_params , guessed_params ,
1121
- statistic , n_mc_samples , random_state )
1115
+ statistic , n_mc_samples , rng )
1122
1116
(dist , data , fixed_nhd_params , fixed_rfd_params , guessed_nhd_params ,
1123
- guessed_rfd_params , statistic , n_mc_samples_int , random_state ) = args
1117
+ guessed_rfd_params , statistic , n_mc_samples_int , rng ) = args
1124
1118
1125
1119
# Fit null hypothesis distribution to data
1126
1120
nhd_fit_fun = _get_fit_fun (dist , data , guessed_nhd_params ,
@@ -1129,7 +1123,7 @@ def goodness_of_fit(dist, data, *, known_params=None, fit_params=None,
1129
1123
nhd_dist = dist (* nhd_vals )
1130
1124
1131
1125
def rvs (size ):
1132
- return nhd_dist .rvs (size = size , random_state = random_state )
1126
+ return nhd_dist .rvs (size = size , random_state = rng )
1133
1127
1134
1128
# Define statistic
1135
1129
fit_fun = _get_fit_fun (dist , data , guessed_rfd_params , fixed_rfd_params )
@@ -1299,7 +1293,7 @@ def _cramer_von_mises(dist, data, axis):
1299
1293
1300
1294
1301
1295
def _gof_iv (dist , data , known_params , fit_params , guessed_params , statistic ,
1302
- n_mc_samples , random_state ):
1296
+ n_mc_samples , rng ):
1303
1297
1304
1298
if not isinstance (dist , stats .rv_continuous ):
1305
1299
message = ("`dist` must be a (non-frozen) instance of "
@@ -1349,7 +1343,7 @@ def _gof_iv(dist, data, known_params, fit_params, guessed_params, statistic,
1349
1343
message = "`n_mc_samples` must be an integer."
1350
1344
raise TypeError (message )
1351
1345
1352
- random_state = check_random_state (random_state )
1346
+ rng = check_random_state (rng )
1353
1347
1354
1348
return (dist , data , fixed_nhd_params , fixed_rfd_params , guessed_nhd_params ,
1355
- guessed_rfd_params , statistic , n_mc_samples_int , random_state )
1349
+ guessed_rfd_params , statistic , n_mc_samples_int , rng )
0 commit comments