Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions montecarlo/econml_dml_te.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
from econml.dml import DMLCateEstimator, LinearDMLCateEstimator, SparseLinearDMLCateEstimator, ForestDMLCateEstimator
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add copyright header.

Also consider adding a description of what this file is for.

import numpy as np
from itertools import product
from sklearn.linear_model import Lasso, LassoCV, LogisticRegression, LogisticRegressionCV,LinearRegression,MultiTaskElasticNet,MultiTaskElasticNetCV
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
import matplotlib
from sklearn.model_selection import train_test_split

# Treatment effect function
def exp_te(x):
return np.exp(2*x[0])

def instance_params(opts, seed):
n_controls = opts['n_controls']
n_features = opts['n_features']
n_samples = opts['n_samples']
support_size = opts['support_size']

instance_params = {}
instance_params['support_Y'] = np.random.choice(np.arange(n_controls), size=support_size, replace=False)
instance_params['coefs_Y'] = np.random.uniform(0, 1, size=support_size)
instance_params['support_T'] = instance_params['support_Y']
instance_params['coefs_T'] = np.random.uniform(0, 1, size=support_size)
return instance_params

def gen_data(opts, instance_params, seed):
n_controls = opts['n_controls']
n_features = opts['n_features']
n_samples = opts['n_samples']
# for k, v in instance_params.items():
# locals()[k]=v
support_Y = instance_params['support_Y']
coefs_Y = instance_params['coefs_Y']
support_T = instance_params['support_T']
coefs_T = instance_params['coefs_T']

# Outcome support
# support_Y = np.random.choice(np.arange(n_controls), size=support_size, replace=False)
# coefs_Y = np.random.uniform(0, 1, size=support_size)
epsilon_sample = lambda n: np.random.uniform(-1, 1, size=n)

# Treatment support
# support_T = support_Y
# coefs_T = np.random.uniform(0, 1, size=support_size)
eta_sample = lambda n: np.random.uniform(-1, 1, size=n)

# Generate controls, covariates, treatments, and outcomes
W = np.random.normal(0, 1, size=(n_samples, n_controls))
X = np.random.uniform(0, 1, size=(n_samples, n_features))

# Heterogenous treatment effects
TE = np.array([exp_te(x) for x in X])
T = np.dot(W[:, support_T], coefs_T) + eta_sample(n_samples)
Y = TE * T + np.dot(W[:, support_T], coefs_Y) + epsilon_sample(n_samples)
Y_train, Y_val, T_train, T_val, X_train, X_val, W_train, W_val = train_test_split(Y, T, X, W, test_size=.2)
# why use train_test_split at all?

# Generate test data
X_test = np.array(list(product(np.arange(0, 1, 0.01), repeat=n_features)))
expected_te = np.array([exp_te(x_i) for x_i in X_test])

# data, true_param
return (X_test, Y_train, T_train, X_train, W_train), expected_te

def linear_dml_fit(data, opts, seed):
X_test, Y, T, X, W = data
model_y = opts['model_y']
model_t = opts['model_t']
inference = opts['inference']

est = LinearDMLCateEstimator(model_y=model_y, model_t=model_t, random_state=seed)
est.fit(Y, T, X, W, inference=inference)
const_marginal_effect = est.const_marginal_effect(X_test) # same as est.effect(X)
lb, ub = est.const_marginal_effect_interval(X_test, alpha=0.05)

return (X_test, const_marginal_effect), (lb, ub)

def sparse_linear_dml_poly_fit(data, opts, seed):
X_test, Y, T, X, W = data
model_y = opts['model_y']
model_t = opts['model_t']
featurizer = opts['featurizer']
inference = opts['inference']

est = SparseLinearDMLCateEstimator(model_y=model_y,
model_t=model_t,
featurizer=featurizer,
random_state=seed)
est.fit(Y, T, X, W, inference=inference)
const_marginal_effect = est.const_marginal_effect(X_test) # same as est.effect(X)
lb, ub = est.const_marginal_effect_interval(X_test, alpha=0.05)

return (X_test, const_marginal_effect), (lb, ub)

def dml_poly_fit(data, opts ,seed):
X_test, Y, T, X, W = data
model_y = opts['model_y']
model_t = opts['model_t']
featurizer = opts['featurizer']
model_final = opts['model_final']
inference = opts['inference']

est = DMLCateEstimator(model_y=model_y,
model_t=model_t,
model_final=model_final,
featurizer=featurizer,
random_state=seed)
est.fit(Y, T, X, W, inference=inference)
const_marginal_effect = est.const_marginal_effect(X_test) # same as est.effect(X)
lb, ub = est.const_marginal_effect_interval(X_test, alpha=0.05)

return (X_test, const_marginal_effect), (lb, ub)

def forest_dml_fit(data, opts, seed):
X_test, Y, T, X, W = data
model_y = opts['model_y']
model_t = opts['model_t']
discrete_treatment = opts['discrete_treatment']
n_estimators = opts['n_estimators']
subsample_fr = opts['subsample_fr']
min_samples_leaf = opts['min_samples_leaf']
min_impurity_decrease = opts['min_impurity_decrease']
verbose = opts['verbose']
min_weight_fraction_leaf = opts['min_weight_fraction_leaf']
inference = opts['inference']

est = ForestDMLCateEstimator(model_y=model_y,
model_t=model_t,
discrete_treatment=discrete_treatment,
n_estimators=n_estimators,
subsample_fr=subsample_fr,
min_samples_leaf=min_samples_leaf,
min_impurity_decrease=min_impurity_decrease,
verbose=verbose,
min_weight_fraction_leaf=min_weight_fraction_leaf)
est.fit(Y, T, X, W, inference=inference)
const_marginal_effect = est.const_marginal_effect(X_test) # same as est.effect(X)
lb, ub = est.const_marginal_effect_interval(X_test, alpha=0.05)

return (X_test, const_marginal_effect), (lb, ub)

def main():
opts = {
'n_controls':30,
'n_features':1,
'n_samples':1000,
'support_size':5
}
method_opts = {
'model_y': RandomForestRegressor(),
'model_t': RandomForestRegressor(),
'inference': 'statsmodels'
}
mc_opts = {'seed':123}
data, true_param = gen_data(opts, mc_opts)
X_test, Y_train, T_train, X_train, W_train = data
te_pred = linear_dml_fit(data, method_opts, mc_opts['seed'])
import pdb; pdb.set_trace()
print(te_pred[0][1])

# print(lb[0:5])
# print(ub[0:5])
# print(np.array(ub-lb)[0:5])
# print(np.mean(np.array(ub-lb)))

# def l2_error(x, y): return np.linalg.norm(x-y, ord=2)
# error = l2_error(X_train, te_pred)
# print(error)
# plt.figure(figsize=(10,6))
# plt.plot(X_test, te_pred, label='DML default')
# plt.plot(X_test, true_param, 'b--', label='True effect')
# plt.ylabel('Treatment Effect')
# plt.xlabel('x')
# plt.legend()
# plt.show()

if __name__=="__main__":
main()
15 changes: 15 additions & 0 deletions montecarlo/mc_from_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add copyright header.

import argparse
from mcpy.monte_carlo import MonteCarlo
import importlib

def monte_carlo_main():
parser = argparse.ArgumentParser(description='Process some integers.')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is intended as the main entrypoint to the framework, it should have docstrings explaining its use.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, the description here seems wrong.

parser.add_argument('--config', type=str, help='config file')
args = parser.parse_args(sys.argv[1:])

config = importlib.import_module(args.config, __name__)
MonteCarlo(config.CONFIG).run()

if __name__=="__main__":
monte_carlo_main()
Empty file added montecarlo/mcpy/__init__.py
Empty file.
63 changes: 63 additions & 0 deletions montecarlo/mcpy/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add copyright header.


def l1_error(x, y): return np.linalg.norm(x-y, ord=1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docstrings to all functions.

def l2_error(x, y): return np.linalg.norm(x-y, ord=2)
def bias(x, y): return x - y
def raw_estimate(x, y): return x
def truth(x, y): return y
def raw_estimate_nonzero_truth(x, y): return x[y>0]

def rmse(param_estimates, true_params):
X_test = param_estimates[0][0][0]
pred = np.array([param_estimates[i][0][1] for i in range(len(param_estimates))])
rmse = np.sqrt(np.mean(np.array(pred-true_params)**2, axis=0))
return (X_test, rmse)

def std(param_estimates, true_params):
X_test = param_estimates[0][0][0]
pred = np.array([param_estimates[i][0][1] for i in range(len(param_estimates))])
return (X_test, np.std(pred, axis=0))

def conf_length(param_estimates, true_params):
X_test = param_estimates[0][0][0]
lb = np.array([param_estimates[i][1][0] for i in range(len(param_estimates))])
ub = np.array([param_estimates[i][1][1] for i in range(len(param_estimates))])
conf_length = np.mean(np.array(ub-lb), axis=0)
return (X_test, conf_length)

def coverage(param_estimates, true_params):
X_test = param_estimates[0][0][0]
coverage = []
for i, param_estimate in enumerate(param_estimates):
lb, ub = param_estimate[1]
coverage.append((lb <= true_params[i]) & (true_params[i] <= ub))
return (X_test, np.mean(coverage, axis=0))

def coverage_band(param_estimates, true_params):
coverage_band = []
for i, param_estimate in enumerate(param_estimates):
lb, ub = param_estimate[1]
covered = (lb<=true_params[i]).all() & (true_params[i]<=ub).all()
coverage_band.append(covered)
return np.array(coverage_band)

def transform_identity(x, dgp, method, metric, config):
return x[dgp][method][metric]

def transform_diff(x, dgp, method, metric, config):
return x[dgp][method][metric] - x[dgp][config['proposed_method']][metric]

def transform_ratio(x, dgp, method, metric, config):
return 100 * (x[dgp][method][metric] - x[dgp][config['proposed_method']][metric]) / x[dgp][method][metric]

def transform_agg_mean(x, dgp, method, metric, config):
return np.mean(x[dgp][method][metric], axis=1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Especially for functions like this it's important to document the expected shapes of the inputs.


def transform_agg_median(x, dgp, method, metric, config):
return np.median(x[dgp][method][metric], axis=1)

def transform_agg_max(x, dgp, method, metric, config):
return np.max(x[dgp][method][metric], axis=1)

def transform_diff_positive(x, dgp, method, metric, config):
return x[dgp][method][metric] - x[dgp][config['proposed_method']][metric] >= 0
Loading