Skip to content

Commit 6ebbdc8

Browse files
authored
Merge branch 'main' into fix-linalg-error
2 parents 4b4aae8 + 67213a4 commit 6ebbdc8

File tree

6 files changed

+35
-203
lines changed

6 files changed

+35
-203
lines changed

causal_testing/json_front/json_class.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,6 @@ def _execute_test_case(
270270
failed = False
271271

272272
estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test)
273-
if "formula" in test:
274-
if not estimation_model.validate_formula(self.causal_specification.causal_dag):
275-
raise ValueError("Formula covariates do not satisfy the constructive back-door criterion.")
276273
causal_test_result = causal_test_case.execute_test(
277274
estimator=estimation_model, data_collector=self.data_collector
278275
)
@@ -334,7 +331,6 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
334331
estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05
335332

336333
estimation_model = test["estimator"](**estimator_kwargs)
337-
338334
return estimation_model
339335

340336
def _append_to_file(self, line: str, log_level: int = None):

causal_testing/testing/estimators.py

Lines changed: 19 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,13 @@
1010
import statsmodels.api as sm
1111
import statsmodels.formula.api as smf
1212
from econml.dml import CausalForestDML
13-
from patsy import dmatrix, ModelDesc
13+
from patsy import dmatrix
1414

1515
from sklearn.ensemble import GradientBoostingRegressor
1616
from statsmodels.regression.linear_model import RegressionResultsWrapper
1717
from statsmodels.tools.sm_exceptions import PerfectSeparationError
1818

1919
from causal_testing.specification.variable import Variable
20-
from causal_testing.specification.causal_dag import CausalDAG
2120

2221
logger = logging.getLogger(__name__)
2322

@@ -84,92 +83,7 @@ def compute_confidence_intervals(self) -> list[float, float]:
8483
"""
8584

8685

87-
class RegressionEstimator(Estimator):
88-
"""An abstract class extending the Estimator functionality to add support for formulae, which are used in
89-
regression based estimators.
90-
91-
"""
92-
93-
def __init__(
94-
# pylint: disable=too-many-arguments
95-
self,
96-
treatment: str,
97-
treatment_value: float,
98-
control_value: float,
99-
adjustment_set: set,
100-
outcome: str,
101-
df: pd.DataFrame = None,
102-
effect_modifiers: dict[str:Any] = None,
103-
formula: str = None,
104-
alpha: float = 0.05,
105-
):
106-
super().__init__(
107-
treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, alpha=alpha
108-
)
109-
110-
if effect_modifiers is None:
111-
effect_modifiers = []
112-
113-
if formula is not None:
114-
self.formula = formula
115-
116-
else:
117-
terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
118-
self.formula = f"{outcome} ~ {'+'.join(terms)}"
119-
120-
@abstractmethod
121-
def add_modelling_assumptions(self):
122-
"""
123-
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
124-
must hold if the resulting causal inference is to be considered valid.
125-
"""
126-
127-
def get_terms_from_formula(self) -> tuple[str, str, list[str]]:
128-
"""
129-
Parse all the terms from a Patsy formula string into outcome, treatment and covariate variables.
130-
131-
Formulae are expected to only have a single left hand side term.
132-
133-
:return: a truple containing the outcome, treatment and covariate variable names in string format
134-
"""
135-
desc = ModelDesc.from_formula(self.formula)
136-
if len(desc.lhs_termlist) > 1:
137-
raise ValueError("More than 1 left hand side term provided in formula, only single term is accepted")
138-
outcome = desc.lhs_termlist[0].factors[0].code
139-
rhs_terms = set()
140-
for term in desc.rhs_termlist:
141-
if term.factors:
142-
rhs_terms.add(term.factors[0].code)
143-
if self.treatment not in rhs_terms:
144-
raise ValueError(f"Treatment variable '{self.treatment}' not found in formula")
145-
rhs_terms.remove(self.treatment)
146-
covariates = rhs_terms
147-
if covariates is None:
148-
covariates = []
149-
else:
150-
covariates = list(covariates)
151-
return outcome, self.treatment, covariates
152-
153-
def validate_formula(self, causal_dag: CausalDAG):
154-
"""
155-
Validate the provided Patsy formula string using the constructive backdoor criterion method found in the
156-
CausalDAG class
157-
158-
:param causal_dag: A CausalDAG object containing for the current test scenario
159-
:return: True for a formula that does not violate the criteria and False if the formula does violate the
160-
criteria
161-
"""
162-
outcome, treatment, covariates = self.get_terms_from_formula()
163-
proper_backdoor_graph = causal_dag.get_proper_backdoor_graph(treatments=[treatment], outcomes=[outcome])
164-
return causal_dag.constructive_backdoor_criterion(
165-
proper_backdoor_graph=proper_backdoor_graph,
166-
treatments=[treatment],
167-
outcomes=[outcome],
168-
covariates=list(covariates),
169-
)
170-
171-
172-
class LogisticRegressionEstimator(RegressionEstimator):
86+
class LogisticRegressionEstimator(Estimator):
17387
"""A Logistic Regression Estimator is a parametric estimator which restricts the variables in the data to a linear
17488
combination of parameters and functions of the variables (note these functions need not be linear). It is designed
17589
for estimating categorical outcomes.
@@ -187,12 +101,16 @@ def __init__(
187101
effect_modifiers: dict[str:Any] = None,
188102
formula: str = None,
189103
):
190-
super().__init__(
191-
treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, formula
192-
)
104+
super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
193105

194106
self.model = None
195107

108+
if formula is not None:
109+
self.formula = formula
110+
else:
111+
terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(self.effect_modifiers))
112+
self.formula = f"{outcome} ~ {'+'.join(((terms)))}"
113+
196114
def add_modelling_assumptions(self):
197115
"""
198116
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
@@ -356,7 +274,7 @@ def estimate_unit_odds_ratio(self) -> float:
356274
return np.exp(model.params[self.treatment])
357275

358276

359-
class LinearRegressionEstimator(RegressionEstimator):
277+
class LinearRegressionEstimator(Estimator):
360278
"""A Linear Regression Estimator is a parametric estimator which restricts the variables in the data to a linear
361279
combination of parameters and functions of the variables (note these functions need not be linear).
362280
"""
@@ -375,18 +293,18 @@ def __init__(
375293
alpha: float = 0.05,
376294
):
377295
super().__init__(
378-
treatment,
379-
treatment_value,
380-
control_value,
381-
adjustment_set,
382-
outcome,
383-
df,
384-
effect_modifiers,
385-
alpha=alpha,
386-
formula=formula,
296+
treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, alpha=alpha
387297
)
388298

389299
self.model = None
300+
if effect_modifiers is None:
301+
effect_modifiers = []
302+
303+
if formula is not None:
304+
self.formula = formula
305+
else:
306+
terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
307+
self.formula = f"{outcome} ~ {'+'.join(terms)}"
390308

391309
for term in self.effect_modifiers:
392310
self.adjustment_set.add(term)

tests/json_front_tests/test_json_class.py

Lines changed: 13 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,19 @@ def test_concrete_generate_params(self):
272272
self.assertIn("failed", temp_out[-1])
273273

274274
def test_no_data_provided(self):
275+
example_test = {
276+
"tests": [
277+
{
278+
"name": "test1",
279+
"mutations": {"test_input": "Increase"},
280+
"estimator": "LinearRegressionEstimator",
281+
"estimate_type": "ate",
282+
"effect_modifiers": [],
283+
"expected_effect": {"test_output": "NoEffect"},
284+
"skip": False,
285+
}
286+
]
287+
}
275288
json_class = JsonUtility("temp_out.txt", True)
276289
json_class.set_paths(self.json_path, self.dag_path)
277290

@@ -303,59 +316,6 @@ def test_estimator_formula_type_check(self):
303316
with self.assertRaises(TypeError):
304317
self.json_class.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
305318

306-
307-
def test_constructive_back_door_not_met(self):
308-
example_test = {
309-
"tests": [
310-
{
311-
"name": "test1",
312-
"mutations": {"X": "Increase"},
313-
"estimator": "LinearRegressionEstimator",
314-
"estimate_type": "ate",
315-
"effect_modifiers": [],
316-
"expected_effect": {"Y": "NoEffect"},
317-
"skip": False,
318-
"formula": "Y ~ X",
319-
}
320-
]
321-
}
322-
inputs = [
323-
Input("X", int),
324-
Input("Z", int)
325-
]
326-
outputs = [
327-
Output("Y", int)
328-
]
329-
variables = inputs + outputs
330-
modelling_scenario = Scenario(variables)
331-
modelling_scenario.setup_treatment_variables()
332-
json_utility = JsonUtility("temp_out.txt", True)
333-
test_data_dir_path = Path("tests/resources/data")
334-
dag_path = str(test_data_dir_path / "dag_not_descendent.dot")
335-
data_path = [str(test_data_dir_path / "not_descendent.csv")]
336-
input_dict_list = [
337-
{"name": "X", "datatype": float},
338-
{"name": "Z", "datatype": float},
339-
]
340-
output_dict_list = [{"name": "Y", "datatype": float}]
341-
variables = CausalVariables(
342-
inputs=input_dict_list, outputs=output_dict_list, metas=None
343-
)
344-
345-
effects = {"NoEffect": NoEffect()}
346-
mutates = {
347-
"Increase": lambda x: json_utility.scenario.treatment_variables[x].z3
348-
> json_utility.scenario.variables[x].z3
349-
}
350-
estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
351-
352-
scenario = Scenario(variables=variables, constraints=None)
353-
json_utility.set_paths(self.json_path, dag_path, data_path)
354-
json_utility.setup(scenario)
355-
json_utility.test_plan = example_test
356-
with self.assertRaises(ValueError):
357-
json_utility.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=False)
358-
359319
def tearDown(self) -> None:
360320
remove_temp_dir_if_existent()
361321
if os.path.exists("temp_out.txt"):

tests/resources/data/dag_not_descendent.dot

Lines changed: 0 additions & 1 deletion
This file was deleted.

tests/resources/data/not_descendent.csv

Lines changed: 0 additions & 2 deletions
This file was deleted.

tests/testing_tests/test_estimators.py

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
CausalForestEstimator,
88
LogisticRegressionEstimator,
99
InstrumentalVariableEstimator,
10-
RegressionEstimator,
1110
)
1211
from causal_testing.specification.variable import Input
1312
from causal_testing.utils.validation import CausalValidator
@@ -125,15 +124,15 @@ def test_ate_adjustment(self):
125124
logistic_regression_estimator = LogisticRegressionEstimator(
126125
"length_in", 65, 55, {"large_gauge"}, "completed", df
127126
)
128-
ate, _ = logistic_regression_estimator.estimate_ate(adjustment_config={"large_gauge": 0})
127+
ate, _ = logistic_regression_estimator.estimate_ate(adjustment_config = {"large_gauge": 0})
129128
self.assertEqual(round(ate, 4), -0.3388)
130129

131130
def test_ate_invalid_adjustment(self):
132131
df = self.scarf_df.copy()
133132
logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, {}, "completed", df)
134133
with self.assertRaises(ValueError):
135134
ate, _ = logistic_regression_estimator.estimate_ate(
136-
adjustment_config={"large_gauge": 0}
135+
adjustment_config = {"large_gauge": 0}
137136
)
138137

139138
def test_ate_effect_modifiers(self):
@@ -395,7 +394,7 @@ def test_program_15_no_interaction_ate_calculated(self):
395394
# for term_to_square in terms_to_square:
396395

397396
ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate_calculated(
398-
adjustment_config={k: self.nhefs_df.mean()[k] for k in covariates}
397+
adjustment_config = {k: self.nhefs_df.mean()[k] for k in covariates}
399398
)
400399
self.assertEqual(round(ate, 1), 3.5)
401400
self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [1.9, 5])
@@ -492,41 +491,3 @@ def test_X1_effect(self):
492491
test_results = lr_model.estimate_ate()
493492
ate = test_results[0]
494493
self.assertAlmostEqual(ate, 2.0)
495-
496-
497-
class TestRegressionEstimator(unittest.TestCase):
498-
"""Test the extended functionality of the TestRegressionEstimator"""
499-
500-
@classmethod
501-
def setUpClass(cls):
502-
class RegressionEstimatorTesting(RegressionEstimator):
503-
def add_modelling_assumptions(self):
504-
pass
505-
506-
cls.regression_estimator = RegressionEstimatorTesting("X", 1, 0, {"Z"}, "Y", formula="Y ~ X + Z")
507-
508-
def test_get_formulae(self):
509-
outcome, treatment, covariates = self.regression_estimator.get_terms_from_formula()
510-
self.assertEqual(outcome, "Y")
511-
self.assertEqual(treatment, "X")
512-
self.assertEqual(covariates, ["Z"])
513-
514-
def test_multiple_lhs_terms(self):
515-
regression_estimator = self.regression_estimator
516-
regression_estimator.formula = "Y + Z ~ X"
517-
with self.assertRaises(ValueError):
518-
self.regression_estimator.get_terms_from_formula()
519-
520-
def test_no_treatment_variable_in_formula(self):
521-
regression_estimator = self.regression_estimator
522-
regression_estimator.formula = "Y ~ A + Z"
523-
with self.assertRaises(ValueError):
524-
self.regression_estimator.get_terms_from_formula()
525-
526-
527-
def test_no_covariate_in_formula(self):
528-
regression_estimator = self.regression_estimator
529-
regression_estimator.formula = "Y ~ X"
530-
outcome, treatment, covariates = self.regression_estimator.get_terms_from_formula()
531-
self.assertEqual(outcome, "Y")
532-
self.assertEqual(treatment, "X")

0 commit comments

Comments
 (0)