Skip to content

Commit a69cb0c

Browse files
committed
Init
1 parent 8b7fe33 commit a69cb0c

File tree

2 files changed

+364
-0
lines changed

2 files changed

+364
-0
lines changed

econml/score/drscorer.py

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
# Copyright (c) PyWhy contributors. All rights reserved.
2+
# Licensed under the MIT License.
3+
4+
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
5+
from ..dr import DRLearner
6+
from sklearn.base import clone
7+
import numpy as np
8+
from scipy.special import softmax
9+
from .ensemble_cate import EnsembleCateEstimator
10+
11+
12+
class DRScorer:
13+
""" Scorer based on the DRLearner loss. Fits regression model g (using T-Learner) and propensity model p at fit time
14+
and calculates the regression and propensity of the evaluation data::
15+
16+
g (model_regression) = E[Y | X, W, T]
17+
18+
p (model_propensity) = Pr[T | X, W]
19+
20+
Ydr(g,p) = g + (Y - g ) / p * T
21+
22+
Then for any given cate model calculates the loss::
23+
24+
loss(cate) = E_n[(Ydr(g, p) - cate(X))^2]
25+
26+
Also calculates a baseline loss based on a constant treatment effect model, i.e.::
27+
28+
base_loss = min_{theta} E_n[(Ydr(g, p) - theta)^2]
29+
30+
Returns an analogue of the R-square score for regression::
31+
32+
score = 1 - loss(cate) / base_loss
33+
34+
This corresponds to the extra variance of the outcome explained by introducing heterogeneity
35+
in the effect as captured by the cate model, as opposed to always predicting a constant effect.
36+
A negative score, means that the cate model performs even worse than a constant effect model
37+
and hints at overfitting during training of the cate model.
38+
39+
This method was also advocated in recent work of [Schuleretal2018]_ when compared among several alternatives
40+
for causal model selection and introduced in the work of [NieWager2017]_.
41+
42+
Parameters
43+
----------
44+
model_propensity : scikit-learn classifier or 'auto', default 'auto'
45+
Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated.
46+
Must implement `fit` and `predict_proba` methods. The `fit` method must be able to accept X and T,
47+
where T is a shape (n, ) array.
48+
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be chosen.
49+
50+
model_regression : scikit-learn regressor or 'auto', default 'auto'
51+
Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments)
52+
concatenated. The one-hot-encoding excludes the baseline treatment. Must implement `fit` and
53+
`predict` methods. If different models per treatment arm are desired, see the
54+
:class:`.MultiModelWrapper` helper class.
55+
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
56+
57+
model_final :
58+
estimator for the final cate model. Trained on regressing the doubly robust potential outcomes
59+
on (features X).
60+
61+
- If X is None, then the fit method of model_final should be able to handle X=None.
62+
- If featurizer is not None and X is not None, then it is trained on the outcome of
63+
featurizer.fit_transform(X).
64+
- If multitask_model_final is True, then this model must support multitasking
65+
and it is trained by regressing all doubly robust target outcomes on (featurized) features simultanteously.
66+
- The output of the predict(X) of the trained model will contain the CATEs for each treatment compared to
67+
baseline treatment (lexicographically smallest). If multitask_model_final is False, it is assumed to be a
68+
mono-task model and a separate clone of the model is trained for each outcome. Then predict(X) of the t-th
69+
clone will be the CATE of the t-th lexicographically ordered treatment compared to the baseline.
70+
71+
multitask_model_final : bool, default False
72+
Whether the model_final should be treated as a multi-task model. See description of model_final.
73+
74+
featurizer : :term:`transformer`, optional
75+
Must support fit_transform and transform. Used to create composite features in the final CATE regression.
76+
It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
77+
If featurizer=None, then CATE is trained on X.
78+
79+
min_propensity : float, default ``1e-6``
80+
The minimum propensity at which to clip propensity estimates to avoid dividing by zero.
81+
82+
categories: 'auto' or list, default 'auto'
83+
The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
84+
The first category will be treated as the control treatment.
85+
86+
cv: int, cross-validation generator or an iterable, default 2
87+
Determines the cross-validation splitting strategy.
88+
Possible inputs for cv are:
89+
90+
- None, to use the default 3-fold cross-validation,
91+
- integer, to specify the number of folds.
92+
- :term:`CV splitter`
93+
- An iterable yielding (train, test) splits as arrays of indices.
94+
95+
For integer/None inputs, if the treatment is discrete
96+
:class:`~sklearn.model_selection.StratifiedKFold` is used, else,
97+
:class:`~sklearn.model_selection.KFold` is used
98+
(with a random shuffle in either case).
99+
100+
Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all
101+
W, X are None, then we call `split(ones((T.shape[0], 1)), T)`.
102+
103+
mc_iters: int, optional
104+
The number of times to rerun the first stage models to reduce the variance of the nuisances.
105+
106+
mc_agg: {'mean', 'median'}, default 'mean'
107+
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
108+
cross-fitting.
109+
110+
random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None
111+
If int, random_state is the seed used by the random number generator;
112+
If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
113+
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
114+
by :mod:`np.random<numpy.random>`.
115+
116+
References
117+
----------
118+
.. [NieWager2017] X. Nie and S. Wager.
119+
Quasi-Oracle Estimation of Heterogeneous Treatment Effects.
120+
arXiv preprint arXiv:1712.04912, 2017.
121+
`<https://arxiv.org/pdf/1712.04912.pdf>`_
122+
123+
.. [Schuleretal2018] Alejandro Schuler, Michael Baiocchi, Robert Tibshirani, Nigam Shah.
124+
"A comparison of methods for model selection when estimating individual treatment effects."
125+
Arxiv, 2018
126+
`<https://arxiv.org/pdf/1804.05146.pdf>`_
127+
128+
"""
129+
130+
def __init__(self, *,
131+
model_propensity='auto',
132+
model_regression='auto',
133+
model_final=StatsModelsLinearRegression(),
134+
multitask_model_final=False,
135+
featurizer=None,
136+
min_propensity=1e-6,
137+
categories='auto',
138+
cv=2,
139+
mc_iters=None,
140+
mc_agg='mean',
141+
random_state=None):
142+
self.model_propensity = clone(model_propensity, safe=False)
143+
self.model_regression = clone(model_regression, safe=False)
144+
self.model_final = clone(model_final, safe=False)
145+
self.multitask_model_final = multitask_model_final
146+
self.featurizer = clone(featurizer, safe=False)
147+
self.min_propensity = min_propensity
148+
self.categories = categories
149+
self.cv = cv
150+
self.mc_iters = mc_iters
151+
self.mc_agg = mc_agg
152+
self.random_state = random_state
153+
154+
def fit(self, y, T, X=None, W=None, sample_weight=None, groups=None):
155+
"""
156+
157+
Parameters
158+
----------
159+
Y: (n × d_y) matrix or vector of length n
160+
Outcomes for each sample
161+
T: (n × dₜ) matrix or vector of length n
162+
Treatments for each sample
163+
X: (n × dₓ) matrix, optional
164+
Features for each sample
165+
W: (n × d_w) matrix, optional
166+
Controls for each sample
167+
sample_weight: (n,) vector, optional
168+
Weights for each row
169+
groups: (n,) vector, optional
170+
All rows corresponding to the same group will be kept together during splitting.
171+
If groups is not None, the `cv` argument passed to this class's initializer
172+
must support a 'groups' argument to its split method.
173+
174+
Returns
175+
-------
176+
self
177+
"""
178+
if X is None:
179+
raise ValueError("X cannot be None for the DRScorer!")
180+
181+
self.drlearner_ = DRLearner(model_propensity=self.model_propensity,
182+
model_regression=self.model_regression,
183+
model_final=self.model_final,
184+
multitask_model_final=self.multitask_model_final,
185+
featurizer=self.featurizer,
186+
min_propensity=self.min_propensity,
187+
categories=self.categories,
188+
cv=self.cv,
189+
mc_iters=self.mc_iters,
190+
mc_agg=self.mc_agg,
191+
random_state=self.random_state)
192+
self.drlearner_.fit(y, T, X=None, W=np.hstack([v for v in [X, W] if v is not None]),
193+
sample_weight=sample_weight, groups=groups, cache_values=True)
194+
self.base_score_ = self.drlearner_.score_
195+
self.dx_ = X.shape[1]
196+
return self
197+
198+
def score(self, cate_model):
199+
"""
200+
Parameters
201+
----------
202+
cate_model : instance of fitted BaseCateEstimator
203+
204+
Returns
205+
-------
206+
score : double
207+
An analogue of the DR-square loss for the causal setting.
208+
"""
209+
Ydr = self.drlearner_.model_final
210+
X = self.drlearner_._cached_values.W[:, :self.dx_]
211+
sample_weight = self.drlearner_._cached_values.sample_weight
212+
if Ydr.ndim == 1:
213+
Ydr = Ydr.reshape((-1, 1))
214+
215+
cate = cate_model.const_marginal_effect(X).reshape((-1, Ydr.shape[1]))
216+
217+
if sample_weight is not None:
218+
return 1 - np.mean(np.average((Ydr - cate)**2, weights=sample_weight, axis=0)) / self.base_score_
219+
else:
220+
return 1 - np.mean((Ydr - cate) ** 2) / self.base_score_
221+
222+
def best_model(self, cate_models, return_scores=False):
223+
""" Chooses the best among a list of models
224+
225+
Parameters
226+
----------
227+
cate_models : list of instance of fitted BaseCateEstimator
228+
return_scores : bool, default False
229+
Whether to return the list scores of each model
230+
Returns
231+
-------
232+
best_model : instance of fitted BaseCateEstimator
233+
The model that achieves the best score
234+
best_score : double
235+
The score of the best model
236+
scores : list of double
237+
The list of scores for each of the input models. Returned only if `return_scores=True`.
238+
"""
239+
rscores = [self.score(mdl) for mdl in cate_models]
240+
best = np.nanargmax(rscores)
241+
if return_scores:
242+
return cate_models[best], rscores[best], rscores
243+
else:
244+
return cate_models[best], rscores[best]
245+
246+
def ensemble(self, cate_models, eta=1000.0, return_scores=False):
247+
""" Ensembles a list of models based on their performance
248+
249+
Parameters
250+
----------
251+
cate_models : list of instance of fitted BaseCateEstimator
252+
eta : double, default 1000
253+
The soft-max parameter for the ensemble
254+
return_scores : bool, default False
255+
Whether to return the list scores of each model
256+
Returns
257+
-------
258+
ensemble_model : instance of fitted EnsembleCateEstimator
259+
A fitted ensemble cate model that calculates effects based on a weighted
260+
version of the input cate models, weighted by a softmax of their score
261+
performance
262+
ensemble_score : double
263+
The score of the ensemble model
264+
scores : list of double
265+
The list of scores for each of the input models. Returned only if `return_scores=True`.
266+
"""
267+
drscores = np.array([self.score(mdl) for mdl in cate_models])
268+
goodinds = np.isfinite(drscores)
269+
weights = softmax(eta * drscores[goodinds])
270+
goodmodels = [mdl for mdl, good in zip(cate_models, goodinds) if good]
271+
ensemble = EnsembleCateEstimator(cate_models=goodmodels, weights=weights)
272+
ensemble_score = self.score(ensemble)
273+
if return_scores:
274+
return ensemble, ensemble_score, drscores
275+
else:
276+
return ensemble, ensemble_score

econml/tests/test_drscorer.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (c) PyWhy contributors. All rights reserved.
2+
# Licensed under the MIT License.
3+
4+
import unittest
5+
import numpy as np
6+
7+
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
8+
np.set_printoptions(suppress=True)
9+
from sklearn.preprocessing import PolynomialFeatures
10+
from sklearn.linear_model import LinearRegression, LogisticRegression
11+
import matplotlib.pyplot as plt
12+
from sklearn.model_selection import train_test_split
13+
from joblib import Parallel, delayed
14+
15+
from econml.dml import DML, LinearDML, SparseLinearDML, NonParamDML
16+
from econml.metalearners import XLearner, TLearner, SLearner, DomainAdaptationLearner
17+
from econml.dr import DRLearner
18+
from econml.score import DRScorer
19+
import scipy.special
20+
21+
22+
def _fit_model(name, model, Y, T, X):
23+
return name, model.fit(Y, T, X=X)
24+
25+
26+
class TestDRScorer(unittest.TestCase):
27+
28+
def _get_data(self):
29+
X = np.random.normal(size=(1000, 3))
30+
T = np.random.binomial(2, scipy.special.expit(X[:, 0]))
31+
sigma = 0.001
32+
y = (1 + .5*X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))
33+
return y, T, X, X[:, 0]
34+
35+
36+
def test_comparison(self):
37+
def reg():
38+
return LinearRegression()
39+
40+
def clf():
41+
return LogisticRegression()
42+
43+
y, T, X, true_eff = self._get_data()
44+
(X_train, X_val, T_train, T_val,
45+
Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4)
46+
47+
models = [('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
48+
linear_first_stages=False, cv=3)),
49+
('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
50+
featurizer=PolynomialFeatures(degree=2, include_bias=False),
51+
linear_first_stages=False, cv=3)),
52+
('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())),
53+
('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(), propensity_model=clf())),
54+
('slearner', SLearner(overall_model=reg())),
55+
('tlearner', TLearner(models=reg())),
56+
('drlearner', DRLearner(model_propensity='auto',model_regression='auto',
57+
model_final=reg(), cv=3)),
58+
('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(),
59+
discrete_treatment=True, cv=3)),
60+
('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True,
61+
featurizer=PolynomialFeatures(degree=3),
62+
linear_first_stages=False, cv=3))
63+
]
64+
65+
models = Parallel(n_jobs=1, verbose=1)(delayed(_fit_model)(name, mdl,
66+
Y_train, T_train, X_train)
67+
for name, mdl in models)
68+
69+
scorer = DRScorer(model_propensity='auto',
70+
model_regression='auto',
71+
model_final=StatsModelsLinearRegression(),
72+
multitask_model_final=False,
73+
featurizer=None,
74+
min_propensity=1e-6,
75+
cv=3,
76+
mc_iters=2,
77+
mc_agg='median')
78+
scorer.fit(Y_val, T_val, X=X_val)
79+
rscore = [scorer.score(mdl) for _, mdl in models]
80+
rootpehe_score = [np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
81+
for _, mdl in models]
82+
assert LinearRegression().fit(np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5
83+
mdl, _ = scorer.best_model([mdl for _, mdl in models])
84+
rootpehe_best = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
85+
assert rootpehe_best < 1.5 * np.min(rootpehe_score) + 0.05
86+
mdl, _ = scorer.ensemble([mdl for _, mdl in models])
87+
rootpehe_ensemble = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
88+
assert rootpehe_ensemble < 1.5 * np.min(rootpehe_score) + 0.05

0 commit comments

Comments
 (0)