Skip to content

Commit a01190a

Browse files
committed
Bootstrap kurtosis coverage
1 parent ff78e40 commit a01190a

File tree

2 files changed

+27
-30
lines changed

2 files changed

+27
-30
lines changed

causal_testing/json_front/json_class.py

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from statistics import StatisticsError
1212

1313
import pandas as pd
14+
import numpy as np
1415
import scipy
1516
from fitter import Fitter, get_common_distributions
1617

@@ -269,23 +270,10 @@ def _execute_test_case(
269270

270271
if "coverage" in test and test["coverage"]:
271272
adequacy = DataAdequacy(causal_test_case, causal_test_engine, estimation_model)
272-
results = adequacy.measure_adequacy_bootstrap(100)
273-
outcomes = [causal_test_case.expected_causal_effect.apply(c) for c in results]
274-
coverage = pd.DataFrame(c.to_dict() for c in results)[["effect_estimate", "ci_low", "ci_high"]]
275-
coverage["pass"] = outcomes
276-
std = coverage.std(numeric_only=True)
277-
self._append_to_file(f"COVERAGE: {coverage['pass'].sum()}", logging.INFO)
278-
# std["pass"] = coverage["pass"].sum()
279-
# print(coverage)
280-
# print(std)
281-
282-
# k_folds = adequacy.measure_adequacy_k_folds()
283-
284-
# import matplotlib.pyplot as plt
285-
#
286-
# plt.hist(coverage["ci_low"], alpha=0.8)
287-
# plt.hist(coverage["ci_high"], alpha=0.8)
288-
# plt.show()
273+
effect_estimate, ci_low, ci_high, outcomes = adequacy.measure_adequacy(100)
274+
275+
self._append_to_file(f"KURTOSIS: {effect_estimate.mean()}", logging.INFO)
276+
self._append_to_file(f"PASSING:\n{sum(outcomes)}/{len(outcomes)}", logging.INFO)
289277

290278
if causal_test_result.ci_low() is not None and causal_test_result.ci_high() is not None:
291279
result_string = (
@@ -398,7 +386,6 @@ def get_args(test_args=None) -> argparse.Namespace:
398386
parser.add_argument(
399387
"--log_path",
400388
help="Specify a directory to change the location of the log file",
401-
default="./json_frontend.log",
402389
)
403390
parser.add_argument(
404391
"--data_path",

causal_testing/testing/causal_test_adequacy.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from sklearn.metrics import mean_squared_error as mse
1313
import numpy as np
1414
from sklearn.model_selection import cross_val_score
15+
import pandas as pd
1516

1617

1718
class DAGAdequacy:
@@ -38,21 +39,30 @@ def __init__(self, test_case: CausalTestCase, test_engine: CausalTestEngine, est
3839
self.test_engine = test_engine
3940
self.estimator = estimator
4041

41-
def measure_adequacy_bootstrap(self, bootstrap_size: int = 100):
42+
def measure_adequacy(self, bootstrap_size: int = 100):
4243
results = []
4344
for i in range(bootstrap_size):
4445
estimator = deepcopy(self.estimator)
4546
estimator.df = estimator.df.sample(len(estimator.df), replace=True, random_state=i)
4647
results.append(self.test_engine.execute_test(estimator, self.test_case))
47-
return results
48+
outcomes = [self.test_case.expected_causal_effect.apply(c) for c in results]
49+
results = pd.DataFrame(c.to_dict() for c in results)[["effect_estimate", "ci_low", "ci_high"]]
4850

49-
def measure_adequacy_k_folds(self, k: int = 10, random_state=0):
50-
results = []
51-
kf = KFold(n_splits=k, shuffle=True, random_state=random_state)
52-
for train_inx, test_inx in kf.split(self.estimator.df):
53-
estimator = deepcopy(self.estimator)
54-
test = estimator.df.iloc[test_inx]
55-
estimator.df = estimator.df.iloc[train_inx]
56-
test_result = estimator.model.predict(test)
57-
results.append(np.sqrt(mse(test_result, test[self.test_case.base_test_case.outcome_variable.name])).mean())
58-
return np.mean(results)
51+
def convert_to_df(field):
52+
converted = []
53+
for r in results[field]:
54+
if isinstance(r, float):
55+
converted.append(
56+
pd.DataFrame({self.test_case.base_test_case.treatment_variable.name: [r]}).transpose()
57+
)
58+
else:
59+
converted.append(r)
60+
return converted
61+
62+
for field in ["effect_estimate", "ci_low", "ci_high"]:
63+
results[field] = convert_to_df(field)
64+
65+
effect_estimate = pd.concat(results["effect_estimate"].tolist(), axis=1).transpose().reset_index(drop=True)
66+
ci_low = pd.concat(results["ci_low"].tolist(), axis=1).transpose()
67+
ci_high = pd.concat(results["ci_high"].tolist(), axis=1).transpose()
68+
return effect_estimate.kurtosis(), ci_low.kurtosis(), ci_high.kurtosis(), outcomes

0 commit comments

Comments
 (0)