API: sparse: transition random-like functions to rng (SPEC 7) (scipy#21888)

mdhaber · dschult · web-flow · commit 81385dfc43b7 · 2024-11-22T10:13:47.000-06:00
---------

Co-authored-by: Dan Schult &lt;dschult@colgate.edu&gt;
diff --git a/doc/source/reference/sparse.migration_to_sparray.rst b/doc/source/reference/sparse.migration_to_sparray.rst
@@ -103,7 +103,7 @@ Their signatures are::
    def block_array(blocks, format=None, dtype=None):
    def diags_array(diagonals, /, *, offsets=0, shape=None, format=None, dtype=None):
    def eye_array(m, n=None, *, k=0, dtype=float, format=None):
-   def random_array(m, n, density=0.01, format='coo', dtype=None, random_state=None, data_random_state=None):
+   def random_array(m, n, density=0.01, format='coo', dtype=None, rng=None, data_random_state=None):
 
 Existing functions that need careful migration
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/scipy/sparse/_construct.py b/scipy/sparse/_construct.py
@@ -11,7 +11,7 @@
 import math
 import numpy as np
 
-from scipy._lib._util import check_random_state, rng_integers
+from scipy._lib._util import check_random_state, rng_integers, _transition_to_rng
 from ._sputils import upcast, get_index_dtype, isscalarlike
 
 from ._sparsetools import csr_hstack
@@ -1093,22 +1093,14 @@ def block_diag(mats, format=None, dtype=None):
                       dtype=dtype).asformat(format)
 
 
+@_transition_to_rng("random_state")
 def random_array(shape, *, density=0.01, format='coo', dtype=None,
-                 random_state=None, data_sampler=None):
+                 rng=None, data_sampler=None):
     """Return a sparse array of uniformly random numbers in [0, 1)
 
     Returns a sparse array with the given shape and density
     where values are generated uniformly randomly in the range [0, 1).
 
-    .. warning::
-
-        Since numpy 1.17, passing a ``np.random.Generator`` (e.g.
-        ``np.random.default_rng``) for ``random_state`` will lead to much
-        faster execution times.
-
-        A much slower implementation is used by default for backwards
-        compatibility.
-
     Parameters
     ----------
     shape : int or tuple of ints
@@ -1120,21 +1112,14 @@ def random_array(shape, *, density=0.01, format='coo', dtype=None,
         sparse matrix format.
     dtype : dtype, optional (default: np.float64)
         type of the returned matrix values.
-    random_state : {None, int, `Generator`, `RandomState`}, optional
-        A random number generator to determine nonzero structure. We recommend using
-        a `numpy.random.Generator` manually provided for every call as it is much
-        faster than RandomState.
-
-        - If `None` (or `np.random`), the `numpy.random.RandomState`
-          singleton is used.
-        - If an int, a new ``Generator`` instance is used,
-          seeded with the int.
-        - If a ``Generator`` or ``RandomState`` instance then
-          that instance is used.
+    rng : `numpy.random.Generator`, optional
+        Pseudorandom number generator state. When `rng` is None, a new
+        `numpy.random.Generator` is created using entropy from the
+        operating system. Types other than `numpy.random.Generator` are
+        passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 
         This random state will be used for sampling `indices` (the sparsity
         structure), and by default for the data values too (see `data_sampler`).
-
     data_sampler : callable, optional (default depends on dtype)
         Sampler of random data values with keyword arg `size`.
         This function should take a single keyword argument `size` specifying
@@ -1143,7 +1128,7 @@ def random_array(shape, *, density=0.01, format='coo', dtype=None,
         By default, uniform [0, 1) random values are used unless `dtype` is
         an integer (default uniform integers from that dtype) or
         complex (default uniform over the unit square in the complex plane).
-        For these, the `random_state` rng is used e.g. ``rng.uniform(size=size)``.
+        For these, the `rng` is used e.g. ``rng.uniform(size=size)``.
 
     Returns
     -------
@@ -1160,13 +1145,13 @@ def random_array(shape, *, density=0.01, format='coo', dtype=None,
 
     Default sampling uniformly from [0, 1):
 
-    >>> S = sp.sparse.random_array((3, 4), density=0.25, random_state=rng)
+    >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng)
 
     Providing a sampler for the values:
 
     >>> rvs = sp.stats.poisson(25, loc=10).rvs
     >>> S = sp.sparse.random_array((3, 4), density=0.25,
-    ...                            random_state=rng, data_sampler=rvs)
+    ...                            rng=rng, data_sampler=rvs)
     >>> S.toarray()
     array([[ 36.,   0.,  33.,   0.],   # random
            [  0.,   0.,   0.,   0.],
@@ -1175,38 +1160,38 @@ def random_array(shape, *, density=0.01, format='coo', dtype=None,
     Building a custom distribution.
     This example builds a squared normal from np.random:
 
-    >>> def np_normal_squared(size=None, random_state=rng):
-    ...     return random_state.standard_normal(size) ** 2
-    >>> S = sp.sparse.random_array((3, 4), density=0.25, random_state=rng,
-    ...                      data_sampler=np_normal_squared)
+    >>> def np_normal_squared(size=None, rng=rng):
+    ...     return rng.standard_normal(size) ** 2
+    >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng,
+    ...                            data_sampler=np_normal_squared)
 
     Or we can build it from sp.stats style rvs functions:
 
-    >>> def sp_stats_normal_squared(size=None, random_state=rng):
+    >>> def sp_stats_normal_squared(size=None, rng=rng):
     ...     std_normal = sp.stats.distributions.norm_gen().rvs
-    ...     return std_normal(size=size, random_state=random_state) ** 2
-    >>> S = sp.sparse.random_array((3, 4), density=0.25, random_state=rng,
-    ...                      data_sampler=sp_stats_normal_squared)
+    ...     return std_normal(size=size, random_state=rng) ** 2
+    >>> S = sp.sparse.random_array((3, 4), density=0.25, rng=rng,
+    ...                            data_sampler=sp_stats_normal_squared)
 
     Or we can subclass sp.stats rv_continuous or rv_discrete:
 
     >>> class NormalSquared(sp.stats.rv_continuous):
     ...     def _rvs(self,  size=None, random_state=rng):
-    ...         return random_state.standard_normal(size) ** 2
+    ...         return rng.standard_normal(size) ** 2
     >>> X = NormalSquared()
     >>> Y = X().rvs
     >>> S = sp.sparse.random_array((3, 4), density=0.25,
-    ...                            random_state=rng, data_sampler=Y)
+    ...                            rng=rng, data_sampler=Y)
     """
     # Use the more efficient RNG by default.
-    if random_state is None:
-        random_state = np.random.default_rng()
-    data, ind = _random(shape, density, format, dtype, random_state, data_sampler)
+    if rng is None:
+        rng = np.random.default_rng()
+    data, ind = _random(shape, density, format, dtype, rng, data_sampler)
     return coo_array((data, ind), shape=shape).asformat(format)
 
 
 def _random(shape, density=0.01, format=None, dtype=None,
-            random_state=None, data_sampler=None):
+            rng=None, data_sampler=None):
     if density < 0 or density > 1:
         raise ValueError("density expected to be 0 <= density <= 1")
 
@@ -1215,7 +1200,7 @@ def _random(shape, density=0.01, format=None, dtype=None,
     # Number of non zero values
     size = int(round(density * tot_prod))
 
-    rng = check_random_state(random_state)
+    rng = check_random_state(rng)
 
     if data_sampler is None:
         if np.issubdtype(dtype, np.integer):
@@ -1250,20 +1235,12 @@ def data_sampler(size):
     return vals, ind
 
 
+@_transition_to_rng("random_state", position_num=5)
 def random(m, n, density=0.01, format='coo', dtype=None,
-           random_state=None, data_rvs=None):
+           rng=None, data_rvs=None):
     """Generate a sparse matrix of the given shape and density with randomly
     distributed values.
 
-    .. warning::
-
-        Since numpy 1.17, passing a ``np.random.Generator`` (e.g.
-        ``np.random.default_rng``) for ``random_state`` will lead to much
-        faster execution times.
-
-        A much slower implementation is used by default for backwards
-        compatibility.
-
     .. warning::
 
         This function returns a sparse matrix -- not a sparse array.
@@ -1281,15 +1258,11 @@ def random(m, n, density=0.01, format='coo', dtype=None,
         sparse matrix format.
     dtype : dtype, optional
         type of the returned matrix values.
-    random_state : {None, int, `numpy.random.Generator`,
-                    `numpy.random.RandomState`}, optional
-
-        - If `seed` is None (or `np.random`), the `numpy.random.RandomState`
-          singleton is used.
-        - If `seed` is an int, a new ``RandomState`` instance is used,
-          seeded with `seed`.
-        - If `seed` is already a ``Generator`` or ``RandomState`` instance then
-          that instance is used.
+    rng : `numpy.random.Generator`, optional
+        Pseudorandom number generator state. When `rng` is None, a new
+        `numpy.random.Generator` is created using entropy from the
+        operating system. Types other than `numpy.random.Generator` are
+        passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 
         This random state will be used for sampling the sparsity structure, but
         not necessarily for sampling the values of the structurally nonzero
@@ -1319,12 +1292,12 @@ def random(m, n, density=0.01, format='coo', dtype=None,
     >>> import scipy as sp
     >>> import numpy as np
     >>> rng = np.random.default_rng()
-    >>> S = sp.sparse.random(3, 4, density=0.25, random_state=rng)
+    >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng)
 
     Providing a sampler for the values:
 
     >>> rvs = sp.stats.poisson(25, loc=10).rvs
-    >>> S = sp.sparse.random(3, 4, density=0.25, random_state=rng, data_rvs=rvs)
+    >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng, data_rvs=rvs)
     >>> S.toarray()
     array([[ 36.,   0.,  33.,   0.],   # random
            [  0.,   0.,   0.,   0.],
@@ -1333,27 +1306,27 @@ def random(m, n, density=0.01, format='coo', dtype=None,
     Building a custom distribution.
     This example builds a squared normal from np.random:
 
-    >>> def np_normal_squared(size=None, random_state=rng):
-    ...     return random_state.standard_normal(size) ** 2
-    >>> S = sp.sparse.random(3, 4, density=0.25, random_state=rng,
+    >>> def np_normal_squared(size=None, rng=rng):
+    ...     return rng.standard_normal(size) ** 2
+    >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng,
     ...                      data_rvs=np_normal_squared)
 
     Or we can build it from sp.stats style rvs functions:
 
-    >>> def sp_stats_normal_squared(size=None, random_state=rng):
+    >>> def sp_stats_normal_squared(size=None, rng=rng):
     ...     std_normal = sp.stats.distributions.norm_gen().rvs
-    ...     return std_normal(size=size, random_state=random_state) ** 2
-    >>> S = sp.sparse.random(3, 4, density=0.25, random_state=rng,
+    ...     return std_normal(size=size, random_state=rng) ** 2
+    >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng,
     ...                      data_rvs=sp_stats_normal_squared)
 
     Or we can subclass sp.stats rv_continuous or rv_discrete:
 
     >>> class NormalSquared(sp.stats.rv_continuous):
     ...     def _rvs(self,  size=None, random_state=rng):
-    ...         return random_state.standard_normal(size) ** 2
+    ...         return rng.standard_normal(size) ** 2
     >>> X = NormalSquared()
     >>> Y = X()  # get a frozen version of the distribution
-    >>> S = sp.sparse.random(3, 4, density=0.25, random_state=rng, data_rvs=Y.rvs)
+    >>> S = sp.sparse.random(3, 4, density=0.25, rng=rng, data_rvs=Y.rvs)
     """
     if n is None:
         n = m
@@ -1364,11 +1337,12 @@ def data_rvs_kw(size):
             return data_rvs(size)
     else:
         data_rvs_kw = None
-    vals, ind = _random((m, n), density, format, dtype, random_state, data_rvs_kw)
+    vals, ind = _random((m, n), density, format, dtype, rng, data_rvs_kw)
     return coo_matrix((vals, ind), shape=(m, n)).asformat(format)
 
 
-def rand(m, n, density=0.01, format="coo", dtype=None, random_state=None):
+@_transition_to_rng("random_state", position_num=5)
+def rand(m, n, density=0.01, format="coo", dtype=None, rng=None):
     """Generate a sparse matrix of the given shape and density with uniformly
     distributed values.
 
@@ -1389,15 +1363,11 @@ def rand(m, n, density=0.01, format="coo", dtype=None, random_state=None):
         sparse matrix format.
     dtype : dtype, optional
         type of the returned matrix values.
-    random_state : {None, int, `numpy.random.Generator`,
-                    `numpy.random.RandomState`}, optional
-
-        If `seed` is None (or `np.random`), the `numpy.random.RandomState`
-        singleton is used.
-        If `seed` is an int, a new ``RandomState`` instance is used,
-        seeded with `seed`.
-        If `seed` is already a ``Generator`` or ``RandomState`` instance then
-        that instance is used.
+    rng : `numpy.random.Generator`, optional
+        Pseudorandom number generator state. When `rng` is None, a new
+        `numpy.random.Generator` is created using entropy from the
+        operating system. Types other than `numpy.random.Generator` are
+        passed to `numpy.random.default_rng` to instantiate a ``Generator``.
 
     Returns
     -------
@@ -1415,7 +1385,7 @@ def rand(m, n, density=0.01, format="coo", dtype=None, random_state=None):
     Examples
     --------
     >>> from scipy.sparse import rand
-    >>> matrix = rand(3, 4, density=0.25, format="csr", random_state=42)
+    >>> matrix = rand(3, 4, density=0.25, format="csr", rng=42)
     >>> matrix
     <Compressed Sparse Row sparse matrix of dtype 'float64'
         with 3 stored elements and shape (3, 4)>
@@ -1425,4 +1395,4 @@ def rand(m, n, density=0.01, format="coo", dtype=None, random_state=None):
            [0.        , 0.        , 0.        , 0.        ]])
 
     """
-    return random(m, n, density, format, dtype, random_state)
+    return random(m, n, density, format, dtype, rng)
diff --git a/scipy/sparse/csgraph/_laplacian.py b/scipy/sparse/csgraph/_laplacian.py
@@ -289,7 +289,7 @@ def laplacian(
     Fix a random seed ``rng`` and add a random sparse noise to the graph ``G``:
 
     >>> rng = np.random.default_rng()
-    >>> G += 1e-2 * random_array((N, N), density=0.1, random_state=rng)
+    >>> G += 1e-2 * random_array((N, N), density=0.1, rng=rng)
 
     Set initial approximations for eigenvectors:
 
diff --git a/scipy/sparse/csgraph/_matching.pyx b/scipy/sparse/csgraph/_matching.pyx
@@ -440,18 +440,18 @@ def min_weight_full_bipartite_matching(biadjacency, maximize=False):
     >>> import numpy as np
     >>> from scipy.sparse import random_array
     >>> from scipy.optimize import linear_sum_assignment
-    >>> sparse = random_array((10, 10), random_state=42, density=.5, format='coo') * 10
+    >>> sparse = random_array((10, 10), rng=42, density=.5, format='coo') * 10
     >>> sparse.data = np.ceil(sparse.data)
     >>> dense = sparse.toarray()
     >>> dense = np.full(sparse.shape, np.inf)
     >>> dense[sparse.row, sparse.col] = sparse.data
     >>> sparse = sparse.tocsr()
     >>> row_ind, col_ind = linear_sum_assignment(dense)
     >>> print(dense[row_ind, col_ind].sum())
-    28.0
+    25.0
     >>> row_ind, col_ind = min_weight_full_bipartite_matching(sparse)
     >>> print(sparse[row_ind, col_ind].sum())
-    28.0
+    25.0
 
     """
     biadjacency = convert_pydata_sparse_to_scipy(biadjacency)
diff --git a/scipy/sparse/csgraph/tests/test_shortest_path.py b/scipy/sparse/csgraph/tests/test_shortest_path.py
@@ -196,13 +196,13 @@ def test_dijkstra_indices_min_only(directed, SP_ans, indices):
 
 @pytest.mark.parametrize('n', (10, 100, 1000))
 def test_dijkstra_min_only_random(n):
-    np.random.seed(1234)
+    rng = np.random.default_rng(7345782358920239234)
     data = scipy.sparse.random_array((n, n), density=0.5, format='lil',
-                                     random_state=42, dtype=np.float64)
+                                     rng=rng, dtype=np.float64)
     data.setdiag(np.zeros(n, dtype=np.bool_))
     # choose some random vertices
     v = np.arange(n)
-    np.random.shuffle(v)
+    rng.shuffle(v)
     indices = v[:int(n*.1)]
     ds, pred, sources = dijkstra(data,
                                  directed=True,
diff --git a/scipy/sparse/linalg/_dsolve/tests/test_linsolve.py b/scipy/sparse/linalg/_dsolve/tests/test_linsolve.py
@@ -888,7 +888,7 @@ def test_is_sptriangular_and_spbandwidth(nnz, fmt):
 
     N = nnz // 2
     dens = 0.1
-    A = scipy.sparse.random_array((N, N), density=dens, format="csr", random_state=rng)
+    A = scipy.sparse.random_array((N, N), density=dens, format="csr", rng=rng)
     A[1, 3] = A[3, 1] = 22  # ensure not upper or lower
     A = A.asformat(fmt)
     AU = scipy.sparse.triu(A, format=fmt)
diff --git a/scipy/sparse/linalg/_eigen/_svds.py b/scipy/sparse/linalg/_eigen/_svds.py
@@ -297,7 +297,7 @@ def svds(A, k=6, ncv=None, tol=0, which='LM', v0=None,
     ever explicitly constructed.
 
     >>> rng = np.random.default_rng(102524723947864966825913730119128190974)
-    >>> G = sparse.random_array((8, 9), density=0.5, random_state=rng)
+    >>> G = sparse.random_array((8, 9), density=0.5, rng=rng)
     >>> Glo = aslinearoperator(G)
     >>> _, singular_values_svds, _ = svds(Glo, k=5, rng=rng)
     >>> _, singular_values_svd, _ = linalg.svd(G.toarray())
diff --git a/scipy/sparse/linalg/_eigen/arpack/tests/test_arpack.py b/scipy/sparse/linalg/_eigen/arpack/tests/test_arpack.py
@@ -682,7 +682,7 @@ def test_real_eigs_real_k_subset():
     rng = np.random.default_rng(2)
 
     n = 10
-    A = random_array(shape=(n, n), density=0.5, random_state=rng)
+    A = random_array(shape=(n, n), density=0.5, rng=rng)
     A.data *= 2
     A.data -= 1
     A += A.T  # make symmetric to test real eigenvalues
diff --git a/scipy/sparse/linalg/_eigen/tests/test_svds.py b/scipy/sparse/linalg/_eigen/tests/test_svds.py
@@ -688,9 +688,9 @@ def test_small_sigma_sparse(self, shape, dtype):
         rng = np.random.default_rng(0)
         k = 5
         (m, n) = shape
-        S = random_array(shape=(m, n), density=0.1, random_state=rng)
+        S = random_array(shape=(m, n), density=0.1, rng=rng)
         if dtype is complex:
-            S = + 1j * random_array(shape=(m, n), density=0.1, random_state=rng)
+            S = + 1j * random_array(shape=(m, n), density=0.1, rng=rng)
         e = np.ones(m)
         e[0:5] *= 1e1 ** np.arange(-5, 0, 1)
         S = dia_array((e, 0), shape=(m, m)) @ S
diff --git a/scipy/sparse/linalg/_isolve/tests/test_gcrotmk.py b/scipy/sparse/linalg/_isolve/tests/test_gcrotmk.py
@@ -66,7 +66,7 @@ def test_preconditioner(self):
     def test_arnoldi(self):
         rng = np.random.default_rng(1)
 
-        A = eye_array(2000) + random_array((2000, 2000), density=5e-4, random_state=rng)
+        A = eye_array(2000) + random_array((2000, 2000), density=5e-4, rng=rng)
         b = rng.random(2000)
 
         # The inner arnoldi should be equivalent to gmres
diff --git a/scipy/sparse/linalg/_isolve/tests/test_lgmres.py b/scipy/sparse/linalg/_isolve/tests/test_lgmres.py
diff --git a/scipy/sparse/linalg/tests/test_expm_multiply.py b/scipy/sparse/linalg/tests/test_expm_multiply.py
diff --git a/scipy/sparse/tests/test_construct.py b/scipy/sparse/tests/test_construct.py
diff --git a/scipy/sparse/tests/test_coo.py b/scipy/sparse/tests/test_coo.py