|
1 | 1 | from pathlib import Path |
2 | 2 |
|
3 | 3 | import numpy as np |
4 | | -import scipy.sparse as sp |
5 | 4 | from joblib import Memory |
6 | 5 |
|
7 | 6 | from sklearn.datasets import ( |
|
17 | 16 | from sklearn.feature_extraction.text import TfidfVectorizer |
18 | 17 | from sklearn.model_selection import train_test_split |
19 | 18 | from sklearn.preprocessing import MaxAbsScaler, StandardScaler |
| 19 | +from sklearn.utils.fixes import _sparse_random_array |
20 | 20 |
|
21 | 21 | # memory location for caching datasets |
22 | 22 | M = Memory(location=str(Path(__file__).resolve().parent / "cache")) |
@@ -100,12 +100,12 @@ def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np.float32 |
100 | 100 | def _synth_regression_sparse_dataset( |
101 | 101 | n_samples=10000, n_features=10000, density=0.01, dtype=np.float32 |
102 | 102 | ): |
103 | | - X = sp.random( |
104 | | - m=n_samples, n=n_features, density=density, format="csr", random_state=0 |
| 103 | + X = _sparse_random_array( |
| 104 | + (n_samples, n_features), density=density, format="csr", random_state=0 |
105 | 105 | ) |
106 | 106 | X.data = np.random.RandomState(0).randn(X.getnnz()) |
107 | 107 | X = X.astype(dtype, copy=False) |
108 | | - coefs = sp.random(m=n_features, n=1, density=0.5, random_state=0) |
| 108 | + coefs = _sparse_random_array((n_features, 1), density=0.5, random_state=0) |
109 | 109 | coefs.data = np.random.RandomState(0).randn(coefs.getnnz()) |
110 | 110 | y = X.dot(coefs.toarray()).reshape(-1) |
111 | 111 | y += 0.2 * y.std() * np.random.randn(n_samples) |
@@ -155,9 +155,8 @@ def _random_dataset( |
155 | 155 | X = np.random.RandomState(0).random_sample((n_samples, n_features)) |
156 | 156 | X = X.astype(dtype, copy=False) |
157 | 157 | else: |
158 | | - X = sp.random( |
159 | | - n_samples, |
160 | | - n_features, |
| 158 | + X = _sparse_random_array( |
| 159 | + (n_samples, n_features), |
161 | 160 | density=0.05, |
162 | 161 | format="csr", |
163 | 162 | dtype=dtype, |
|
0 commit comments