Skip to content

Commit 0061647

Browse files
authored
remove normalize parameter anticipating its removal from sklearn (#207)
* remove normalize parameter anticipating its removal from sklearn * fix warning, increase coverage * require sklearn>=1.0 in setup
1 parent bd0e48f commit 0061647

File tree

6 files changed

+35
-67
lines changed

6 files changed

+35
-67
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ name: build
33
on:
44
push:
55
branches:
6-
- 'master'
6+
- 'main'
77
pull_request:
88
branches:
9-
- master
9+
- main
1010

1111
jobs:
1212
build-linux:

celer/dropin_sklearn.py

Lines changed: 20 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020

2121
class Lasso(Lasso_sklearn):
22-
"""
22+
r"""
2323
Lasso scikit-learn estimator based on Celer solver
2424
2525
The optimization objective for Lasso is::
@@ -61,11 +61,6 @@ class Lasso(Lasso_sklearn):
6161
Strictly positive weights used in the L1 penalty part of the Lasso
6262
objective. If None, weights equal to 1 are used.
6363
64-
normalize : bool, optional (default=False)
65-
This parameter is ignored when ``fit_intercept`` is set to False.
66-
If True, the regressors X will be normalized before regression by
67-
subtracting the mean and dividing by the l2-norm.
68-
6964
warm_start : bool, optional (default=False)
7065
When set to True, reuse the solution of the previous call to fit as
7166
initialization, otherwise, just erase the previous solution.
@@ -113,12 +108,11 @@ class Lasso(Lasso_sklearn):
113108

114109
def __init__(self, alpha=1., max_iter=100, max_epochs=50000, p0=10,
115110
verbose=0, tol=1e-4, prune=True, fit_intercept=True,
116-
weights=None, normalize=False, warm_start=False,
111+
weights=None, warm_start=False,
117112
positive=False):
118113
super(Lasso, self).__init__(
119114
alpha=alpha, tol=tol, max_iter=max_iter,
120-
fit_intercept=fit_intercept, normalize=normalize,
121-
warm_start=warm_start)
115+
fit_intercept=fit_intercept, warm_start=warm_start)
122116
self.verbose = verbose
123117
self.max_epochs = max_epochs
124118
self.p0 = p0
@@ -140,7 +134,7 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True, **kwargs):
140134

141135

142136
class LassoCV(RegressorMixin, LinearModelCV):
143-
"""
137+
r"""
144138
LassoCV scikit-learn estimator based on Celer solver
145139
146140
The best model is selected by cross-validation.
@@ -167,11 +161,6 @@ class LassoCV(RegressorMixin, LinearModelCV):
167161
to false, no intercept will be used in calculations
168162
(e.g. data is expected to be already centered).
169163
170-
normalize : bool, optional (default=False)
171-
This parameter is ignored when ``fit_intercept`` is set to False.
172-
If True, the regressors X will be normalized before regression by
173-
subtracting the mean and dividing by the l2-norm.
174-
175164
max_iter : int, optional
176165
The maximum number of iterations (subproblem definitions).
177166
@@ -243,12 +232,12 @@ class LassoCV(RegressorMixin, LinearModelCV):
243232
"""
244233

245234
def __init__(self, eps=1e-3, n_alphas=100, alphas=None,
246-
fit_intercept=True, normalize=False, max_iter=100,
235+
fit_intercept=True, max_iter=100,
247236
tol=1e-4, cv=None, verbose=0, max_epochs=50000, p0=10,
248237
prune=True, precompute='auto', positive=False, n_jobs=None):
249238
super(LassoCV, self).__init__(
250239
eps=eps, n_alphas=n_alphas, alphas=alphas, max_iter=max_iter,
251-
tol=tol, cv=cv, fit_intercept=fit_intercept, normalize=normalize,
240+
tol=tol, cv=cv, fit_intercept=fit_intercept,
252241
verbose=verbose, n_jobs=n_jobs)
253242
self.max_epochs = max_epochs
254243
self.p0 = p0
@@ -276,7 +265,7 @@ def _more_tags(self):
276265

277266

278267
class MultiTaskLasso(MultiTaskLasso_sklearn):
279-
"""
268+
r"""
280269
MultiTaskLasso scikit-learn estimator based on Celer solver
281270
282271
The optimization objective for MultiTaskLasso is::
@@ -314,11 +303,6 @@ class MultiTaskLasso(MultiTaskLasso_sklearn):
314303
fit_intercept : bool, optional (default=True)
315304
Whether or not to fit an intercept.
316305
317-
normalize : bool, optional (default=False)
318-
This parameter is ignored when ``fit_intercept`` is set to False.
319-
If True, the regressors X will be normalized before regression by
320-
subtracting the mean and dividing by the l2-norm.
321-
322306
warm_start : bool, optional (default=False)
323307
When set to True, reuse the solution of the previous call to fit as
324308
initialization, otherwise, just erase the previous solution.
@@ -351,11 +335,10 @@ class MultiTaskLasso(MultiTaskLasso_sklearn):
351335

352336
def __init__(self, alpha=1., max_iter=100,
353337
max_epochs=50000, p0=10, verbose=0, tol=1e-4, prune=True,
354-
fit_intercept=True, normalize=False, warm_start=False):
338+
fit_intercept=True, warm_start=False):
355339
super().__init__(
356340
alpha=alpha, tol=tol, max_iter=max_iter,
357-
fit_intercept=fit_intercept, normalize=normalize,
358-
warm_start=warm_start)
341+
fit_intercept=fit_intercept, warm_start=warm_start)
359342
self.verbose = verbose
360343
self.max_epochs = max_epochs
361344
self.p0 = p0
@@ -382,7 +365,7 @@ def fit(self, X, y):
382365
% (n_samples, y.shape[0]))
383366

384367
X, y, X_offset, y_offset, X_scale = _preprocess_data(
385-
X, y, self.fit_intercept, self.normalize, copy=False)
368+
X, y, self.fit_intercept, copy=False)
386369

387370
if not self.warm_start or not hasattr(self, "coef_"):
388371
self.coef_ = None
@@ -400,7 +383,7 @@ def fit(self, X, y):
400383

401384

402385
class MultiTaskLassoCV(RegressorMixin, LinearModelCV):
403-
"""
386+
r"""
404387
MultiTaskLassoCV scikit-learn estimator based on Celer solver
405388
406389
The best model is selected by cross-validation.
@@ -427,11 +410,6 @@ class MultiTaskLassoCV(RegressorMixin, LinearModelCV):
427410
to false, no intercept will be used in calculations
428411
(e.g. data is expected to be already centered).
429412
430-
normalize : bool, optional (default=False)
431-
This parameter is ignored when ``fit_intercept`` is set to False.
432-
If True, the regressors X will be normalized before regression by
433-
subtracting the mean and dividing by the l2-norm.
434-
435413
max_iter : int, optional
436414
The maximum number of iterations (subproblem definitions).
437415
@@ -498,13 +476,13 @@ class MultiTaskLassoCV(RegressorMixin, LinearModelCV):
498476
"""
499477

500478
def __init__(self, eps=1e-3, n_alphas=100, alphas=None,
501-
fit_intercept=True, normalize=False, max_iter=100,
479+
fit_intercept=True, max_iter=100,
502480
tol=1e-4, cv=None, verbose=0,
503481
max_epochs=50000, p0=10, prune=True, precompute='auto',
504482
n_jobs=1):
505483
super().__init__(
506484
eps=eps, n_alphas=n_alphas, alphas=alphas, max_iter=max_iter,
507-
tol=tol, cv=cv, fit_intercept=fit_intercept, normalize=normalize,
485+
tol=tol, cv=cv, fit_intercept=fit_intercept,
508486
verbose=verbose, n_jobs=n_jobs)
509487
self.max_epochs = max_epochs
510488
self.p0 = p0
@@ -531,7 +509,7 @@ def _more_tags(self):
531509

532510

533511
class LogisticRegression(LogReg_sklearn):
534-
"""
512+
r"""
535513
Sparse Logistic regression scikit-learn estimator based on Celer solver.
536514
537515
The optimization objective for sparse Logistic regression is::
@@ -774,11 +752,6 @@ class GroupLasso(Lasso_sklearn):
774752
fit_intercept : bool, optional (default=True)
775753
Whether or not to fit an intercept.
776754
777-
normalize : bool, optional (default=False)
778-
This parameter is ignored when ``fit_intercept`` is set to False.
779-
If True, the regressors X will be normalized before regression by
780-
subtracting the mean and dividing by the l2-norm.
781-
782755
warm_start : bool, optional (default=False)
783756
When set to True, reuse the solution of the previous call to fit as
784757
initialization, otherwise, just erase the previous solution.
@@ -803,7 +776,7 @@ class GroupLasso(Lasso_sklearn):
803776
>>> clf = GroupLasso(alpha=0.5, groups=[[0, 1], [2]])
804777
>>> clf.fit([[0, 0, 1], [1, -1, 2], [2, 0, -1]], [1, 1, -1])
805778
GroupLasso(alpha=0.5, fit_intercept=True,
806-
groups=[[0, 1], [2]], max_epochs=50000, max_iter=100, normalize=False,
779+
groups=[[0, 1], [2]], max_epochs=50000, max_iter=100,
807780
p0=10, prune=True, tol=0.0001, verbose=0, warm_start=False)
808781
>>> print(clf.coef_)
809782
[-0. -0. 0.39285714]
@@ -828,10 +801,10 @@ class GroupLasso(Lasso_sklearn):
828801

829802
def __init__(self, groups=1, alpha=1., max_iter=100,
830803
max_epochs=50000, p0=10, verbose=0, tol=1e-4, prune=True,
831-
fit_intercept=True, normalize=False, warm_start=False):
804+
fit_intercept=True, warm_start=False):
832805
super(GroupLasso, self).__init__(
833806
alpha=alpha, tol=tol, max_iter=max_iter,
834-
fit_intercept=fit_intercept, normalize=normalize,
807+
fit_intercept=fit_intercept,
835808
warm_start=warm_start)
836809
self.groups = groups
837810
self.verbose = verbose
@@ -854,7 +827,7 @@ def path(self, X, y, alphas, coef_init=None, return_n_iter=True,
854827

855828

856829
class GroupLassoCV(LassoCV, LinearModelCV):
857-
"""
830+
r"""
858831
GroupLassoCV scikit-learn estimator based on Celer solver
859832
860833
The best model is selected by cross-validation.
@@ -892,11 +865,6 @@ class GroupLassoCV(LassoCV, LinearModelCV):
892865
to false, no intercept will be used in calculations
893866
(e.g. data is expected to be already centered).
894867
895-
normalize : bool, optional (default=False)
896-
This parameter is ignored when ``fit_intercept`` is set to False.
897-
If True, the regressors X will be normalized before regression by
898-
subtracting the mean and dividing by the l2-norm.
899-
900868
max_iter : int, optional
901869
The maximum number of iterations (subproblem definitions).
902870
@@ -968,12 +936,12 @@ class GroupLassoCV(LassoCV, LinearModelCV):
968936
"""
969937

970938
def __init__(self, groups=None, eps=1e-3, n_alphas=100, alphas=None,
971-
fit_intercept=True, normalize=False, max_iter=100,
939+
fit_intercept=True, max_iter=100,
972940
tol=1e-4, cv=None, verbose=0, max_epochs=50000, p0=10,
973941
prune=True, precompute='auto', positive=False, n_jobs=None):
974942
super(GroupLassoCV, self).__init__(
975943
eps=eps, n_alphas=n_alphas, alphas=alphas, max_iter=max_iter,
976-
tol=tol, cv=cv, fit_intercept=fit_intercept, normalize=normalize,
944+
tol=tol, cv=cv, fit_intercept=fit_intercept,
977945
verbose=verbose, n_jobs=n_jobs)
978946
self.groups = groups
979947
self.max_epochs = max_epochs

celer/tests/test_lasso.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_Lasso(sparse_X, fit_intercept, positive):
115115

116116
alpha = alpha_max / 2.
117117
params = dict(alpha=alpha, fit_intercept=fit_intercept, tol=1e-10,
118-
normalize=True, positive=positive)
118+
positive=positive)
119119
clf = Lasso(**params)
120120
clf.fit(X, y)
121121

celer/tests/test_logreg.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def test_LogisticRegression(sparse_X):
4646
alpha_max = norm(X.T.dot(y), ord=np.inf) / 2
4747
C = 20. / alpha_max
4848

49+
clf = LogisticRegression(C=-1)
50+
np.testing.assert_raises(ValueError, clf.fit, X, y)
4951
tol = 1e-8
5052
clf1 = LogisticRegression(C=C, tol=tol, verbose=0)
5153
clf1.fit(X, y)

celer/tests/test_mtl.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,21 @@
1515
from celer.utils.testing import build_dataset
1616

1717

18-
@pytest.mark.parametrize("sparse_X, fit_intercept, normalize",
19-
itertools.product([0, 1], [0, 1], [0, 1]))
20-
def test_GroupLasso_Lasso_equivalence(sparse_X, fit_intercept, normalize):
18+
@pytest.mark.parametrize("sparse_X, fit_intercept",
19+
itertools.product([0, 1], [0, 1]))
20+
def test_GroupLasso_Lasso_equivalence(sparse_X, fit_intercept):
2121
"""Check that GroupLasso with groups of size 1 gives Lasso."""
2222
n_features = 1000
2323
X, y = build_dataset(
2424
n_samples=100, n_features=n_features, sparse_X=sparse_X)
2525
alpha_max = norm(X.T @ y, ord=np.inf) / len(y)
2626
alpha = alpha_max / 10
2727
clf = Lasso(alpha, tol=1e-12, fit_intercept=fit_intercept,
28-
normalize=normalize, verbose=0)
28+
verbose=0)
2929
clf.fit(X, y)
3030
# take groups of size 1:
3131
clf1 = GroupLasso(alpha=alpha, groups=1, tol=1e-12,
32-
fit_intercept=fit_intercept, normalize=normalize,
33-
verbose=0)
32+
fit_intercept=fit_intercept, verbose=0)
3433
clf1.fit(X, y)
3534

3635
np.testing.assert_allclose(clf1.coef_, clf.coef_, atol=1e-6)
@@ -65,12 +64,12 @@ def test_GroupLasso_MultitaskLasso_equivalence():
6564
np.testing.assert_allclose(alpha_max, other / len(Y_))
6665

6766
alpha = alpha_max / 10
68-
clf = MultiTaskLasso(alpha, fit_intercept=False, tol=1e-8, verbose=2)
67+
clf = MultiTaskLasso(alpha, fit_intercept=False, tol=1e-8, verbose=0)
6968
clf.fit(X_, Y_)
7069

7170
groups = [grp.tolist() for grp in grp_indices.reshape(50, 3)]
7271
clf1 = GroupLasso(alpha=alpha / 3, groups=groups,
73-
fit_intercept=False, tol=1e-8, verbose=2)
72+
fit_intercept=False, tol=1e-8, verbose=0)
7473
clf1.fit(X, y)
7574

7675
np.testing.assert_allclose(clf1.coef_, clf.coef_.reshape(-1), atol=1e-4)
@@ -110,7 +109,7 @@ def test_MultiTaskLassoCV():
110109
X, y = build_dataset(n_samples=30, n_features=50, n_targets=3)
111110

112111
params = dict(eps=1e-2, n_alphas=10, tol=1e-12, cv=2, n_jobs=1,
113-
fit_intercept=False, verbose=2)
112+
fit_intercept=False, verbose=0)
114113

115114
clf = MultiTaskLassoCV(**params)
116115
clf.fit(X, y)
@@ -140,8 +139,7 @@ def test_MultiTaskLasso(fit_intercept):
140139
alpha_max = np.max(norm(X.T.dot(Y), axis=1)) / X.shape[0]
141140

142141
alpha = alpha_max / 2.
143-
params = dict(alpha=alpha, fit_intercept=fit_intercept, tol=1e-10,
144-
normalize=True)
142+
params = dict(alpha=alpha, fit_intercept=fit_intercept, tol=1e-10)
145143
clf = MultiTaskLasso(**params)
146144
clf.verbose = 2
147145
clf.fit(X, Y)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
download_url=DOWNLOAD_URL,
3636
install_requires=['numpy>=1.12', 'seaborn>=0.7', 'scipy>=0.18.0',
3737
'matplotlib>=2.0.0', 'Cython>=0.26', 'libsvmdata',
38-
'scikit-learn>=0.24', 'xarray', 'download', 'tqdm'],
38+
'scikit-learn>=1.0', 'xarray', 'download', 'tqdm'],
3939
packages=find_packages(),
4040
cmdclass={'build_ext': build_ext},
4141
ext_modules=[

0 commit comments

Comments
 (0)