Skip to content

Commit 4040aaa

Browse files
committed
Merge remote-tracking branch 'upstream/main' into features2
2 parents bbc8ba5 + 08f9b20 commit 4040aaa

File tree

111 files changed

+1283
-554
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+1283
-554
lines changed

asv_benchmarks/benchmarks/datasets.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from pathlib import Path
22

33
import numpy as np
4-
import scipy.sparse as sp
54
from joblib import Memory
65

76
from sklearn.datasets import (
@@ -17,6 +16,7 @@
1716
from sklearn.feature_extraction.text import TfidfVectorizer
1817
from sklearn.model_selection import train_test_split
1918
from sklearn.preprocessing import MaxAbsScaler, StandardScaler
19+
from sklearn.utils.fixes import _sparse_random_array
2020

2121
# memory location for caching datasets
2222
M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
@@ -100,12 +100,12 @@ def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np.float32
100100
def _synth_regression_sparse_dataset(
101101
n_samples=10000, n_features=10000, density=0.01, dtype=np.float32
102102
):
103-
X = sp.random(
104-
m=n_samples, n=n_features, density=density, format="csr", random_state=0
103+
X = _sparse_random_array(
104+
(n_samples, n_features), density=density, format="csr", random_state=0
105105
)
106106
X.data = np.random.RandomState(0).randn(X.getnnz())
107107
X = X.astype(dtype, copy=False)
108-
coefs = sp.random(m=n_features, n=1, density=0.5, random_state=0)
108+
coefs = _sparse_random_array((n_features, 1), density=0.5, random_state=0)
109109
coefs.data = np.random.RandomState(0).randn(coefs.getnnz())
110110
y = X.dot(coefs.toarray()).reshape(-1)
111111
y += 0.2 * y.std() * np.random.randn(n_samples)
@@ -155,9 +155,8 @@ def _random_dataset(
155155
X = np.random.RandomState(0).random_sample((n_samples, n_features))
156156
X = X.astype(dtype, copy=False)
157157
else:
158-
X = sp.random(
159-
n_samples,
160-
n_features,
158+
X = _sparse_random_array(
159+
(n_samples, n_features),
161160
density=0.05,
162161
format="csr",
163162
dtype=dtype,

benchmarks/bench_feature_expansions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import matplotlib.pyplot as plt
44
import numpy as np
5-
import scipy.sparse as sparse
65

76
from sklearn.preprocessing import PolynomialFeatures
7+
from sklearn.utils.fixes import _sparse_random_array
88

99
degree = 2
1010
trials = 3
@@ -21,7 +21,7 @@
2121
for density in densities:
2222
for dim_index, dim in enumerate(dimensionalities):
2323
print(trial, density, dim)
24-
X_csr = sparse.random(num_rows, dim, density).tocsr()
24+
X_csr = _sparse_random_array((num_rows, dim), density=density, format="csr")
2525
X_dense = X_csr.toarray()
2626
# CSR
2727
t0 = time()

benchmarks/bench_plot_randomized_svd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def get_data(dataset_name):
188188
data = np.repeat(data, 10)
189189
row = np.random.uniform(0, small_size, sparsity)
190190
col = np.random.uniform(0, small_size, sparsity)
191-
X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
191+
X = sp.sparse.csr_array((data, (row, col)), shape=(size, small_size))
192192
del data
193193
del row
194194
del col

benchmarks/bench_random_projections.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def bench_scikit_transformer(X, transformer):
7070
# Gaussian distributed values
7171
def make_sparse_random_data(n_samples, n_features, n_nonzeros, random_state=None):
7272
rng = np.random.RandomState(random_state)
73-
data_coo = sp.coo_matrix(
73+
data_coo = sp.coo_array(
7474
(
7575
rng.randn(n_nonzeros),
7676
(

doc/modules/impute.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ that contain the missing values::
5656
The :class:`SimpleImputer` class also supports sparse matrices::
5757

5858
>>> import scipy.sparse as sp
59-
>>> X = sp.csc_matrix([[1, 2], [0, -1], [8, 4]])
59+
>>> X = sp.csc_array([[1, 2], [0, -1], [8, 4]])
6060
>>> imp = SimpleImputer(missing_values=-1, strategy='mean')
6161
>>> imp.fit(X)
6262
SimpleImputer(missing_values=-1)
63-
>>> X_test = sp.csc_matrix([[-1, 2], [6, -1], [7, 6]])
63+
>>> X_test = sp.csc_array([[-1, 2], [6, -1], [7, 6]])
6464
>>> print(imp.transform(X_test).toarray())
6565
[[3. 2.]
6666
[6. 3.]

doc/modules/multiclass.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,9 @@ Valid :term:`multiclass` representations for
169169
[1 0 0]
170170
[0 1 0]]
171171
>>> from scipy import sparse
172-
>>> y_sparse = sparse.csr_matrix(y_dense)
172+
>>> y_sparse = sparse.csr_array(y_dense)
173173
>>> print(y_sparse)
174-
<Compressed Sparse Row sparse matrix of dtype 'int64'
174+
<Compressed Sparse Row sparse array of dtype 'int64'
175175
with 4 stored elements and shape (4, 3)>
176176
Coords Values
177177
(0, 0) 1
@@ -379,9 +379,9 @@ refer to :ref:`preprocessing_targets`.
379379

380380
An example of the same ``y`` in sparse matrix form:
381381

382-
>>> y_sparse = sparse.csr_matrix(y)
382+
>>> y_sparse = sparse.csr_array(y)
383383
>>> print(y_sparse)
384-
<Compressed Sparse Row sparse matrix of dtype 'int64'
384+
<Compressed Sparse Row sparse array of dtype 'int64'
385385
with 4 stored elements and shape (3, 4)>
386386
Coords Values
387387
(0, 0) 1
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
- Introduced a new config key: "sparse_interface" to control whether functions
2+
return sparse objects using SciPy sparse matrix or SciPy sparse array.
3+
Use `sklearn.set_config(sparse_interface="sparray")` to have sklearn
4+
return sparse arrays. See more at `the SciPy Sparse Migration Guide.
5+
<https://docs.scipy.org/doc/scipy/reference/sparse.migration_to_sparray.html>`_
6+
The scikit-learn config "sparse_interface" initially defaults
7+
to sparse matrix ("spmatrix"). The plan is to have the default change to
8+
sparse array ("sparray") in a few releases.
9+
By :user:`Dan Schult <dschult>`
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- :meth:`metrics.PrecisionRecallDisplay.from_estimator` and
2+
:meth:`metrics.PrecisionRecallDisplay.from_predictions` now
3+
correctly plot chance level line when `y_true` is a pytorch tensor.
4+
By :user:`Lucas Oliveira <lucolivi>`.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
- The `shuffle` and the `random_state` parameters are deprecated on
2+
:class:`~preprocessing.TargetEncoder` and will be removed in version 1.11. Pass a
3+
cross-validation generator as `cv` argument to specify the shuffling behaviour
4+
instead.
5+
By :user:`Stefanie Senger <StefanieSenger>`.

examples/applications/plot_tomography_l1_reconstruction.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ def build_projection_operator(l_x, n_dir):
8989
weights += list(w[mask])
9090
camera_inds += list(inds[mask] + i * l_x)
9191
data_inds += list(data_unravel_indices[mask])
92-
proj_operator = sparse.coo_matrix((weights, (camera_inds, data_inds)))
92+
camera_inds = np.array(camera_inds, dtype=np.int32) # lasso needs int32 inds
93+
data_inds = np.array(data_inds, dtype=np.int32)
94+
proj_operator = sparse.coo_array((weights, (camera_inds, data_inds)))
9395
return proj_operator
9496

9597

0 commit comments

Comments
 (0)