Skip to content

Commit e2fcc6f

Browse files
committed
Extra logging
1 parent 35146f1 commit e2fcc6f

File tree

5 files changed

+19
-23
lines changed

5 files changed

+19
-23
lines changed

causal_testing/json_front/json_class.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ def _setup_test(
286286
"outcome": causal_test_case.outcome_variable.name,
287287
"df": causal_test_engine.scenario_execution_data_df,
288288
"effect_modifiers": causal_test_case.effect_modifier_configuration,
289+
"alpha": test['alpha'] if 'alpha' in test else 0.05
289290
}
290291
if "formula" in test:
291292
estimator_kwargs["formula"] = test["formula"]

causal_testing/specification/metamorphic_relation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ def to_json_stub(self, skip=True) -> dict:
181181
"mutations": [self.treatment_var],
182182
"expected_effect": {self.output_var: "NoEffect"},
183183
"formula": f"{self.output_var} ~ {' + '.join([self.treatment_var] + self.adjustment_vars)}",
184+
"alpha": 0.01,
184185
"skip": skip,
185186
}
186187

causal_testing/testing/causal_test_outcome.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ def apply(self, res: CausalTestResult) -> bool:
5151
ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
5252
ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
5353
value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
54+
55+
if not all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)):
56+
print("FAILING ON", [(ci_low, ci_high) for ci_low, ci_high in zip(ci_low, ci_high) if not ci_low < 0 < ci_high])
57+
5458
return all(ci_low < 0 < ci_high for ci_low, ci_high in zip(ci_low, ci_high)) or all(
5559
abs(v) < self.atol for v in value
5660
)

causal_testing/testing/causal_test_result.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,16 @@ def push(s, inc=" "):
5656
f"Treatment value: {self.estimator.treatment_value}\n"
5757
f"Outcome: {self.estimator.outcome}\n"
5858
f"Adjustment set: {self.adjustment_set}\n"
59-
f"{self.test_value.type}: {result_str}\n"
59+
f"Formula: {self.estimator.formula}\n"
60+
f"{self.test_value.type.capitalize()}: {result_str}\n"
6061
)
6162
confidence_str = ""
6263
if self.confidence_intervals:
6364
ci_str = " " + str(self.confidence_intervals)
6465
if "\n" in ci_str:
6566
ci_str = " " + push(pd.DataFrame(self.confidence_intervals).transpose().to_string(header=False))
6667
confidence_str += f"Confidence intervals:{ci_str}\n"
68+
confidence_str += f"Alpha:{self.estimator.alpha}\n"
6769
return base_str + confidence_str
6870

6971
def to_dict(self):

causal_testing/testing/estimators.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,15 @@ def __init__(
4949
outcome: str,
5050
df: pd.DataFrame = None,
5151
effect_modifiers: dict[str:Any] = None,
52+
alpha: float = 0.05
5253
):
5354
self.treatment = treatment
5455
self.treatment_value = treatment_value
5556
self.control_value = control_value
5657
self.adjustment_set = adjustment_set
5758
self.outcome = outcome
5859
self.df = df
60+
self.alpha = alpha
5961
if effect_modifiers is None:
6062
self.effect_modifiers = {}
6163
elif isinstance(effect_modifiers, dict):
@@ -233,7 +235,7 @@ def estimate_ate(self, bootstrap_size=100, adjustment_config=None) -> float:
233235
return estimate, (None, None)
234236

235237
bootstraps = sorted(list(treatment_bootstraps - control_bootstraps))
236-
bound = int((bootstrap_size * 0.05) / 2)
238+
bound = int((bootstrap_size * self.alpha) / 2)
237239
ci_low = bootstraps[bound]
238240
ci_high = bootstraps[bootstrap_size - bound]
239241

@@ -263,7 +265,7 @@ def estimate_risk_ratio(self, bootstrap_size=100, adjustment_config=None) -> flo
263265
return estimate, (None, None)
264266

265267
bootstraps = sorted(list(treatment_bootstraps / control_bootstraps))
266-
bound = ceil((bootstrap_size * 0.05) / 2)
268+
bound = ceil((bootstrap_size * self.alpha) / 2)
267269
ci_low = bootstraps[bound]
268270
ci_high = bootstraps[bootstrap_size - bound]
269271

@@ -301,8 +303,9 @@ def __init__(
301303
df: pd.DataFrame = None,
302304
effect_modifiers: dict[Variable:Any] = None,
303305
formula: str = None,
306+
alpha: float = 0.05
304307
):
305-
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
308+
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, alpha=alpha)
306309

307310
self.model = None
308311
if effect_modifiers is None:
@@ -336,7 +339,6 @@ def estimate_unit_ate(self) -> float:
336339
"""
337340
model = self._run_linear_regression()
338341
newline = "\n"
339-
print(model.conf_int())
340342
treatment = [self.treatment]
341343
if str(self.df.dtypes[self.treatment]) == "object":
342344
design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
@@ -372,7 +374,7 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
372374
# Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
373375
t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
374376
ate = t_test_results.effect[0]
375-
confidence_intervals = list(t_test_results.conf_int().flatten())
377+
confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
376378
return ate, confidence_intervals
377379

378380
def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd.Series, pd.Series]:
@@ -434,25 +436,11 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
434436
435437
:return: The model after fitting to data.
436438
"""
437-
# 1. Reduce dataframe to contain only the necessary columns
438-
reduced_df = self.df.copy()
439-
necessary_cols = [self.treatment] + list(self.adjustment_set) + [self.outcome]
440-
missing_rows = reduced_df[necessary_cols].isnull().any(axis=1)
441-
reduced_df = reduced_df[~missing_rows]
442-
reduced_df = reduced_df.sort_values([self.treatment])
443-
logger.debug(reduced_df[necessary_cols])
444-
445-
# 2. Add intercept
446-
reduced_df["Intercept"] = 1 # self.intercept
447-
448-
# 3. Estimate the unit difference in outcome caused by unit difference in treatment
449-
cols = [self.treatment]
450-
cols += [x for x in self.adjustment_set if x not in cols]
451439
model = smf.ols(formula=self.formula, data=self.df).fit()
452440
return model
453441

454442
def _get_confidence_intervals(self, model, treatment):
455-
confidence_intervals = model.conf_int(alpha=0.05, cols=None)
443+
confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
456444
ci_low, ci_high = (
457445
confidence_intervals[0].loc[treatment],
458446
confidence_intervals[1].loc[treatment],
@@ -519,7 +507,7 @@ def estimate_unit_ate(self, bootstrap_size=100):
519507
bootstraps = sorted(
520508
[self.estimate_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
521509
)
522-
bound = ceil((bootstrap_size * 0.05) / 2)
510+
bound = ceil((bootstrap_size * self.alpha) / 2)
523511
ci_low = bootstraps[bound]
524512
ci_high = bootstraps[bootstrap_size - bound]
525513

@@ -610,7 +598,7 @@ def estimate_cates(self) -> pd.DataFrame:
610598
# Obtain CATES and confidence intervals
611599
conditional_ates = model.effect(effect_modifier_df, T0=self.control_value, T1=self.treatment_value).flatten()
612600
[ci_low, ci_high] = model.effect_interval(
613-
effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=0.05
601+
effect_modifier_df, T0=self.control_value, T1=self.treatment_value, alpha=self.alpha
614602
)
615603

616604
# Merge results into a dataframe (CATE, confidence intervals, and effect modifier values)

0 commit comments

Comments
 (0)