We now support placing conditions on the data again.

jmafoster1 · jmafoster1 · commit 868ea5d9b53d · 2024-01-30T09:16:43.000Z
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -301,9 +301,6 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
         """Create the necessary inputs for a single test case
         :param causal_test_case: The concrete test case to be executed
         :param test: Single JSON test definition stored in a mapping (dict)
-        :param conditions: A list of conditions which should be applied to the
-        data. Conditions should be in the query format detailed at
-        https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
         :returns:
                 - estimation_model - Estimator instance for the test being run
         """
@@ -315,6 +312,7 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
                     "formulas"
                 )
             estimator_kwargs["formula"] = test["formula"]
+            estimator_kwargs["query"] = test["query"] if "query" in test else ""
             estimator_kwargs["adjustment_set"] = None
         else:
             minimal_adjustment_set = self.causal_specification.causal_dag.identification(
@@ -328,6 +326,7 @@ def _setup_test(self, causal_test_case: CausalTestCase, test: Mapping) -> Estima
         estimator_kwargs["control_value"] = causal_test_case.control_value
         estimator_kwargs["outcome"] = causal_test_case.outcome_variable.name
         estimator_kwargs["effect_modifiers"] = causal_test_case.effect_modifier_configuration
+        estimator_kwargs["df"] = self.data_collector.collect_data()
         estimator_kwargs["alpha"] = test["alpha"] if "alpha" in test else 0.05
 
         estimation_model = test["estimator"](**estimator_kwargs)
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
@@ -50,21 +50,25 @@ def __init__(
         df: pd.DataFrame = None,
         effect_modifiers: dict[str:Any] = None,
         alpha: float = 0.05,
+        query: str = "",
     ):
         self.treatment = treatment
         self.treatment_value = treatment_value
         self.control_value = control_value
         self.adjustment_set = adjustment_set
         self.outcome = outcome
-        self.df = df
         self.alpha = alpha
+        self.df = df.query(query) if query else df
+
         if effect_modifiers is None:
             self.effect_modifiers = {}
         elif isinstance(effect_modifiers, dict):
             self.effect_modifiers = effect_modifiers
         else:
             raise ValueError(f"Unsupported type for effect_modifiers {effect_modifiers}. Expected iterable")
         self.modelling_assumptions = []
+        if query:
+            self.modelling_assumptions.append(query)
         self.add_modelling_assumptions()
         logger.debug("Effect Modifiers: %s", self.effect_modifiers)
 
@@ -100,8 +104,18 @@ def __init__(
         df: pd.DataFrame = None,
         effect_modifiers: dict[str:Any] = None,
         formula: str = None,
+        query: str = "",
     ):
-        super().__init__(treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers)
+        super().__init__(
+            treatment=treatment,
+            treatment_value=treatment_value,
+            control_value=control_value,
+            adjustment_set=adjustment_set,
+            outcome=outcome,
+            df=df,
+            effect_modifiers=effect_modifiers,
+            query=query,
+        )
 
         self.model = None
 
@@ -116,13 +130,13 @@ def add_modelling_assumptions(self):
         Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
         must hold if the resulting causal inference is to be considered valid.
         """
-        self.modelling_assumptions += (
+        self.modelling_assumptions.append(
             "The variables in the data must fit a shape which can be expressed as a linear"
             "combination of parameters and functions of variables. Note that these functions"
             "do not need to be linear."
         )
-        self.modelling_assumptions += "The outcome must be binary."
-        self.modelling_assumptions += "Independently and identically distributed errors."
+        self.modelling_assumptions.append("The outcome must be binary.")
+        self.modelling_assumptions.append("Independently and identically distributed errors.")
 
     def _run_logistic_regression(self, data) -> RegressionResultsWrapper:
         """Run logistic regression of the treatment and adjustment set against the outcome and return the model.
@@ -291,9 +305,18 @@ def __init__(
         effect_modifiers: dict[Variable:Any] = None,
         formula: str = None,
         alpha: float = 0.05,
+        query: str = "",
     ):
         super().__init__(
-            treatment, treatment_value, control_value, adjustment_set, outcome, df, effect_modifiers, alpha=alpha
+            treatment,
+            treatment_value,
+            control_value,
+            adjustment_set,
+            outcome,
+            df,
+            effect_modifiers,
+            alpha=alpha,
+            query=query,
         )
 
         self.model = None
@@ -314,7 +337,7 @@ def add_modelling_assumptions(self):
         Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
         must hold if the resulting causal inference is to be considered valid.
         """
-        self.modelling_assumptions += (
+        self.modelling_assumptions.append(
             "The variables in the data must fit a shape which can be expressed as a linear"
             "combination of parameters and functions of variables. Note that these functions"
             "do not need to be linear."
@@ -468,13 +491,17 @@ def add_modelling_assumptions(self):
         Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
         must hold if the resulting causal inference is to be considered valid.
         """
-        self.modelling_assumptions += """The instrument and the treatment, and the treatment and the outcome must be
+        self.modelling_assumptions.append(
+            """The instrument and the treatment, and the treatment and the outcome must be
         related linearly in the form Y = aX + b."""
-        self.modelling_assumptions += """The three IV conditions must hold
+        )
+        self.modelling_assumptions.append(
+            """The three IV conditions must hold
             (i) Instrument is associated with treatment
             (ii) Instrument does not affect outcome except through its potential effect on treatment
             (iii) Instrument and outcome do not share causes
         """
+        )
 
     def estimate_iv_coefficient(self, df):
         """
@@ -517,7 +544,7 @@ def add_modelling_assumptions(self):
 
         :return self: Update self.modelling_assumptions
         """
-        self.modelling_assumptions += "Non-parametric estimator: no restrictions imposed on the data."
+        self.modelling_assumptions.append("Non-parametric estimator: no restrictions imposed on the data.")
 
     def estimate_ate(self) -> float:
         """Estimate the average treatment effect.