Skip to content

Commit f1bfe22

Browse files
committed
Fix support for sklearn<1.0
1 parent b95f594 commit f1bfe22

File tree

4 files changed

+121
-43
lines changed

4 files changed

+121
-43
lines changed

econml/sklearn_extensions/linear_model.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,51 @@
3838
from joblib import Parallel, delayed
3939

4040

41+
# TODO: once we drop support for sklearn < 1.0, we can remove this
42+
def _add_normalize(to_wrap):
43+
"""
44+
Add a fictitious "normalize" argument to linear model initializer signatures.
45+
46+
This is necessary for their get_params to play nicely with some other sklearn-internal methods.
47+
48+
Note that directly adding a **params argument to the ordinary initializer will not work,
49+
because get_params explicitly looks only at the initializer signature arguments that are not
50+
varargs or varkeywords, so we need to modify the signature of the initializer to include the
51+
"normalize" argument.
52+
"""
53+
# if we're decorating a class, just update the __init__ method,
54+
# so that the result is still a class instead of a wrapper method
55+
if isinstance(to_wrap, type):
56+
import sklearn
57+
from packaging import version
58+
59+
if version.parse(sklearn.__version__) >= version.parse("1.0"):
60+
# normalize was deprecated or removed; don't need to do anything
61+
return to_wrap
62+
63+
else:
64+
from inspect import Parameter, signature
65+
from functools import wraps
66+
67+
old_init = to_wrap.__init__
68+
69+
@wraps(old_init)
70+
def new_init(self, *args, normalize=False, **kwargs):
71+
if normalize is not False:
72+
warnings.warn("normalize is deprecated and will be ignored", stacklevel=2)
73+
return old_init(self, *args, **kwargs)
74+
75+
sig = signature(old_init)
76+
sig = sig.replace(parameters=[*sig.parameters.values(),
77+
Parameter("normalize", kind=Parameter.KEYWORD_ONLY, default=False)])
78+
79+
new_init.__signature__ = sig
80+
to_wrap.__init__ = new_init
81+
return to_wrap
82+
else:
83+
raise ValueError("This decorator was applied to a method, but is intended to be applied only to types.")
84+
85+
4186
def _weighted_check_cv(cv=5, y=None, classifier=False, random_state=None):
4287
cv = 5 if cv is None else cv
4388
if isinstance(cv, numbers.Integral):
@@ -131,6 +176,7 @@ def _fit_weighted_linear_model(self, X, y, sample_weight, check_input=None):
131176
super().fit(**fit_params)
132177

133178

179+
@_add_normalize
134180
class WeightedLasso(WeightedModelMixin, Lasso):
135181
"""Version of sklearn Lasso that accepts weights.
136182
@@ -236,6 +282,7 @@ def fit(self, X, y, sample_weight=None, check_input=True):
236282
return self
237283

238284

285+
@_add_normalize
239286
class WeightedMultiTaskLasso(WeightedModelMixin, MultiTaskLasso):
240287
"""Version of sklearn MultiTaskLasso that accepts weights.
241288
@@ -325,6 +372,7 @@ def fit(self, X, y, sample_weight=None):
325372
return self
326373

327374

375+
@_add_normalize
328376
class WeightedLassoCV(WeightedModelMixin, LassoCV):
329377
"""Version of sklearn LassoCV that accepts weights.
330378
@@ -443,6 +491,7 @@ def fit(self, X, y, sample_weight=None):
443491
return self
444492

445493

494+
@_add_normalize
446495
class WeightedMultiTaskLassoCV(WeightedModelMixin, MultiTaskLassoCV):
447496
"""Version of sklearn MultiTaskLassoCV that accepts weights.
448497
@@ -582,6 +631,7 @@ def _get_theta_coefs_and_tau_sq(i, X, sample_weight, alpha_cov, n_alphas_cov, ma
582631
return coefs, tausq
583632

584633

634+
@_add_normalize
585635
class DebiasedLasso(WeightedLasso):
586636
"""Debiased Lasso model.
587637
@@ -927,6 +977,7 @@ def _get_unscaled_coef_var(self, X, theta_hat, sample_weight):
927977
return _unscaled_coef_var
928978

929979

980+
@_add_normalize
930981
class MultiOutputDebiasedLasso(MultiOutputRegressor):
931982
"""Debiased MultiOutputLasso model.
932983

econml/tests/test_dml.py

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression
88
from sklearn.pipeline import Pipeline
99
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures
10-
from sklearn.model_selection import KFold, GroupKFold
10+
from sklearn.model_selection import KFold, GroupKFold, check_cv
1111
from econml.dml import DML, LinearDML, SparseLinearDML, KernelDML, CausalForestDML
1212
from econml.dml import NonParamDML
1313
import numpy as np
@@ -1141,27 +1141,37 @@ def test_groups(self):
11411141
est.fit(y, t, groups=groups)
11421142

11431143
# test nested grouping
1144-
class NestedModel(LassoCV):
1145-
def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
1146-
precompute='auto', max_iter=1000, tol=1e-4,
1147-
copy_X=True, cv=None, verbose=False, n_jobs=None,
1148-
positive=False, random_state=None, selection='cyclic'):
1149-
1150-
super().__init__(
1151-
eps=eps, n_alphas=n_alphas, alphas=alphas,
1152-
fit_intercept=fit_intercept,
1153-
precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X,
1154-
cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive,
1155-
random_state=random_state, selection=selection)
1144+
class NestedModel:
1145+
def __init__(self, cv):
1146+
self.model = LassoCV(cv=cv)
1147+
1148+
# DML nested CV works via a 'cv' attribute
1149+
@property
1150+
def cv(self):
1151+
return self.model.cv
1152+
1153+
@cv.setter
1154+
def cv(self, value):
1155+
self.model.cv = value
11561156

11571157
def fit(self, X, y):
1158-
# ensure that the grouping has worked correctly and we get all 10 copies of the items in
1159-
# whichever groups we saw
1160-
(yvals, cts) = np.unique(y, return_counts=True)
1161-
for (yval, ct) in zip(yvals, cts):
1162-
if ct != 10:
1163-
raise Exception("Grouping failed; received {0} copies of {1} instead of 10".format(ct, yval))
1164-
return super().fit(X, y)
1158+
for (train, test) in check_cv(self.cv, y).split(X, y):
1159+
(yvals, cts) = np.unique(y[train], return_counts=True)
1160+
# with 2-fold outer and 2-fold inner grouping, and six total groups,
1161+
# should get 1 or 2 groups per split
1162+
if len(yvals) > 2:
1163+
raise Exception(f"Grouping failed: received {len(yval)} groups instead of at most 2")
1164+
1165+
# ensure that the grouping has worked correctly and we get all 10 copies of the items in
1166+
# whichever groups we see
1167+
for (yval, ct) in zip(yvals, cts):
1168+
if ct != 10:
1169+
raise Exception(f"Grouping failed; received {ct} copies of {yval} instead of 10")
1170+
self.model.fit(X, y)
1171+
return self
1172+
1173+
def predict(self, X):
1174+
return self.model.predict(X)
11651175

11661176
# test nested grouping
11671177
est = LinearDML(model_y=NestedModel(cv=2), model_t=NestedModel(cv=2), cv=GroupKFold(2))
@@ -1170,6 +1180,6 @@ def fit(self, X, y):
11701180
# by default, we use 5 split cross-validation for our T and Y models
11711181
# but we don't have enough groups here to split both the outer and inner samples with grouping
11721182
# TODO: does this imply we should change some defaults to make this more likely to succeed?
1173-
est = LinearDML(cv=GroupKFold(2))
1183+
est = LinearDML(model_y=LassoCV(cv=5), model_t=LassoCV(cv=5), cv=GroupKFold(2))
11741184
with pytest.raises(Exception):
11751185
est.fit(y, t, groups=groups)

econml/tests/test_drlearner.py

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from sklearn.base import TransformerMixin
1414
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
1515
from sklearn.exceptions import DataConversionWarning
16-
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression
17-
from sklearn.model_selection import KFold, GroupKFold
16+
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression, LogisticRegressionCV
17+
from sklearn.model_selection import KFold, GroupKFold, check_cv
1818
from sklearn.pipeline import Pipeline
1919
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures
2020

@@ -799,27 +799,37 @@ def test_groups(self):
799799
est.fit(y, t, W=w, groups=groups)
800800

801801
# test nested grouping
802-
class NestedModel(LassoCV):
803-
def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,
804-
precompute='auto', max_iter=1000, tol=1e-4,
805-
copy_X=True, cv=None, verbose=False, n_jobs=None,
806-
positive=False, random_state=None, selection='cyclic'):
807-
808-
super().__init__(
809-
eps=eps, n_alphas=n_alphas, alphas=alphas,
810-
fit_intercept=fit_intercept,
811-
precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X,
812-
cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive,
813-
random_state=random_state, selection=selection)
802+
class NestedModel:
803+
def __init__(self, cv):
804+
self.model = LassoCV(cv=cv)
805+
806+
# DML nested CV works via a 'cv' attribute
807+
@property
808+
def cv(self):
809+
return self.model.cv
810+
811+
@cv.setter
812+
def cv(self, value):
813+
self.model.cv = value
814814

815815
def fit(self, X, y):
816-
# ensure that the grouping has worked correctly and we get all 10 copies of the items in
817-
# whichever groups we saw
818-
(yvals, cts) = np.unique(y, return_counts=True)
819-
for (yval, ct) in zip(yvals, cts):
820-
if ct != 10:
821-
raise Exception("Grouping failed; received {0} copies of {1} instead of 10".format(ct, yval))
822-
return super().fit(X, y)
816+
for (train, test) in check_cv(self.cv, y).split(X, y):
817+
(yvals, cts) = np.unique(y[train], return_counts=True)
818+
# with 2-fold outer and 2-fold inner grouping, and six total groups,
819+
# should get 1 or 2 groups per split
820+
if len(yvals) > 2:
821+
raise Exception(f"Grouping failed: received {len(yval)} groups instead of at most 2")
822+
823+
# ensure that the grouping has worked correctly and we get all 10 copies of the items in
824+
# whichever groups we see
825+
for (yval, ct) in zip(yvals, cts):
826+
if ct != 10:
827+
raise Exception(f"Grouping failed; received {ct} copies of {yval} instead of 10")
828+
self.model.fit(X, y)
829+
return self
830+
831+
def predict(self, X):
832+
return self.model.predict(X)
823833

824834
# test nested grouping
825835
est = LinearDRLearner(model_propensity=LogisticRegression(),
@@ -829,7 +839,9 @@ def fit(self, X, y):
829839
# by default, we use 5 split cross-validation for our T and Y models
830840
# but we don't have enough groups here to split both the outer and inner samples with grouping
831841
# TODO: does this imply we should change some defaults to make this more likely to succeed?
832-
est = LinearDRLearner(cv=GroupKFold(2))
842+
est = LinearDRLearner(model_propensity=LogisticRegressionCV(cv=5),
843+
model_regression=LassoCV(cv=5),
844+
cv=GroupKFold(2))
833845
with pytest.raises(Exception):
834846
est.fit(y, t, W=w, groups=groups)
835847

econml/tests/test_linear_model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ def setUpClass(cls):
5151
cls.y_2D_consistent = np.concatenate((TestLassoExtensions.y_simple.reshape(-1, 1),
5252
TestLassoExtensions.y2_full.reshape(-1, 1)), axis=1)
5353

54+
def test_can_clone(self):
55+
for model in [WeightedLasso(), WeightedLassoCV(), WeightedMultiTaskLassoCV(),
56+
WeightedLassoCVWrapper(), DebiasedLasso(), MultiOutputDebiasedLasso()]:
57+
clone(model)
58+
5459
#################
5560
# WeightedLasso #
5661
#################

0 commit comments

Comments
 (0)