Merge pull request #307 from CITCOM-project/jmafoster1/ignore-cycles-dafni

jmafoster1 · web-flow · commit 08ab5aaa9727 · 2025-02-17T15:25:19.000Z
Jmafoster1/ignore cycles dafni
diff --git a/causal_testing/estimation/abstract_regression_estimator.py b/causal_testing/estimation/abstract_regression_estimator.py
@@ -41,6 +41,7 @@ def __init__(
             outcome=outcome,
             df=df,
             effect_modifiers=effect_modifiers,
+            alpha=alpha,
             query=query,
         )
 
diff --git a/causal_testing/estimation/logistic_regression_estimator.py b/causal_testing/estimation/logistic_regression_estimator.py
@@ -3,6 +3,7 @@
 import logging
 
 import numpy as np
+import pandas as pd
 import statsmodels.formula.api as smf
 
 from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator
@@ -31,11 +32,12 @@ def add_modelling_assumptions(self):
         self.modelling_assumptions.append("The outcome must be binary.")
         self.modelling_assumptions.append("Independently and identically distributed errors.")
 
-    def estimate_unit_odds_ratio(self) -> float:
+    def estimate_unit_odds_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the odds ratio of increasing the treatment by one. In logistic regression, this corresponds to the
         coefficient of the treatment of interest.
 
         :return: The odds ratio. Confidence intervals are not yet supported.
         """
         model = self._run_regression(self.df)
-        return np.exp(model.params[self.treatment])
+        ci_low, ci_high = np.exp(model.conf_int(self.alpha).loc[self.treatment])
+        return pd.Series(np.exp(model.params[self.treatment])), [pd.Series(ci_low), pd.Series(ci_high)]
diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py
@@ -11,6 +11,7 @@
 from statistics import StatisticsError
 
 import pandas as pd
+import numpy as np
 import scipy
 from fitter import Fitter, get_common_distributions
 
@@ -21,7 +22,7 @@
 from causal_testing.specification.scenario import Scenario
 from causal_testing.specification.variable import Input, Meta, Output
 from causal_testing.testing.causal_test_case import CausalTestCase
-from causal_testing.testing.causal_test_result import CausalTestResult
+from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_test_adequacy import DataAdequacy
 
@@ -136,8 +137,10 @@ def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False,
                 failed, msg = self._run_concrete_metamorphic_test(test, f_flag, effects)
             # If we have a variable to mutate
             else:
-                if test["estimate_type"] == "coefficient":
-                    failed, msg = self._run_coefficient_test(test=test, f_flag=f_flag, effects=effects)
+                if test["estimate_type"] in ["coefficient", "unit_odds_ratio"]:
+                    failed, msg = self._run_coefficient_test(
+                        test=test, f_flag=f_flag, effects=effects, estimate_type=test["estimate_type"]
+                    )
                 else:
                     failed, msg = self._run_metamorphic_tests(
                         test=test, f_flag=f_flag, effects=effects, mutates=mutates
@@ -146,7 +149,7 @@ def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False,
             test["result"] = msg
         return self.test_plan["tests"]
 
-    def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict):
+    def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict, estimate_type: str = "coefficient"):
         """Builds structures and runs test case for tests with an estimate_type of 'coefficient'.
 
         :param test: Single JSON test definition stored in a mapping (dict)
@@ -163,10 +166,11 @@ def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict):
         causal_test_case = CausalTestCase(
             base_test_case=base_test_case,
             expected_causal_effect=next(effects[effect] for variable, effect in test["expected_effect"].items()),
-            estimate_type="coefficient",
+            estimate_type=estimate_type,
             effect_modifier_configuration={self.scenario.variables[v] for v in test.get("effect_modifiers", [])},
         )
         failed, result = self._execute_test_case(causal_test_case=causal_test_case, test=test, f_flag=f_flag)
+
         msg = (
             f"Executing test: {test['name']} \n"
             + f"  {causal_test_case} \n"
@@ -273,10 +277,17 @@ def _execute_test_case(
         failed = False
 
         estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test)
-        causal_test_result = causal_test_case.execute_test(
-            estimator=estimation_model, data_collector=self.data_collector
-        )
-        test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
+        try:
+            causal_test_result = causal_test_case.execute_test(
+                estimator=estimation_model, data_collector=self.data_collector
+            )
+            test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result)
+        except np.linalg.LinAlgError as e:
+            result = CausalTestResult(
+                estimator=estimation_model,
+                test_value=TestValue("Error", str(e)),
+            )
+            return None, result
 
         if "coverage" in test and test["coverage"]:
             adequacy_metric = DataAdequacy(causal_test_case, estimation_model)
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
@@ -29,7 +29,7 @@ class SomeEffect(CausalTestOutcome):
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_low() is None or res.ci_high() is None:
             return None
-        if res.test_value.type in ("risk_ratio", "hazard_ratio"):
+        if res.test_value.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"):
             return any(
                 1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high())
             )
@@ -54,7 +54,7 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type in ("risk_ratio", "hazard_ratio"):
+        if res.test_value.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"):
             return any(
                 ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol)
                 for ci_low, ci_high, value in zip(res.ci_low(), res.ci_high(), res.test_value.value)
diff --git a/dafni/main_dafni.py b/dafni/main_dafni.py
@@ -12,6 +12,7 @@
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.causal_test_outcome import Positive, Negative, NoEffect, SomeEffect
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator
 from causal_testing.json_front.json_class import JsonUtility
 
 
@@ -29,44 +30,36 @@ def get_args(test_args=None) -> argparse.Namespace:
             - argparse.Namespace - A Namsespace consisting of the arguments to this script
     """
     parser = argparse.ArgumentParser(description="A script for running the CTF on DAFNI.")
-
-    parser.add_argument("--data_path", required=True, help="Path to the input runtime data (.csv)", nargs="+")
-
-    parser.add_argument(
-        "--tests_path", required=True, help="Input configuration file path " "containing the causal tests (.json)"
-    )
-
+    parser.add_argument("-d", "--data_path", required=True, help="Path to the input runtime data (.csv)", nargs="+")
     parser.add_argument(
-        "-i", "--ignore_cycles", action="store_true", help="Whether to ignore cycles in the DAG.", default=False
+        "-t", "--tests_path", required=True, help="Input configuration file path " "containing the causal tests (.json)"
     )
-
     parser.add_argument(
+        "-v",
         "--variables_path",
         required=True,
         help="Input configuration file path " "containing the predefined variables (.json)",
     )
-
     parser.add_argument(
+        "-D",
         "--dag_path",
         required=True,
         help="Input configuration file path containing a valid DAG (.dot). "
         "Note: this must be supplied if the --tests argument isn't provided.",
     )
-
-    parser.add_argument("--output_path", required=False, help="Path to the output directory.")
-
+    parser.add_argument(
+        "-i", "--ignore_cycles", action="store_true", help="Whether to ignore cycles in the DAG.", default=False
+    )
     parser.add_argument(
         "-f", default=False, help="(Optional) Failure flag to step the framework from running if a test has failed."
     )
-
+    parser.add_argument("-o", "--output_path", required=False, help="Path to the output directory.")
     parser.add_argument(
         "-w",
         default=False,
         help="(Optional) Specify to overwrite any existing output files. "
-        "This can lead to the loss of existing outputs if not "
-        "careful",
+        "This can lead to the loss of existing outputs if not careful",
     )
-
     args = parser.parse_args(test_args)
 
     # Convert these to Path objects for main()
@@ -165,7 +158,10 @@ def main():
 
         modelling_scenario.setup_treatment_variables()
 
-        estimators = {"LinearRegressionEstimator": LinearRegressionEstimator}
+        estimators = {
+            "LinearRegressionEstimator": LinearRegressionEstimator,
+            "LogisticRegressionEstimator": LogisticRegressionEstimator,
+        }
 
         # Step 3: Define the expected variables
 
diff --git a/tests/estimation_tests/test_logistic_regression_estimator.py b/tests/estimation_tests/test_logistic_regression_estimator.py
@@ -19,5 +19,5 @@ def setUpClass(cls) -> None:
     def test_odds_ratio(self):
         df = self.scarf_df.copy()
         logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, set(), "completed", df)
-        odds = logistic_regression_estimator.estimate_unit_odds_ratio()
-        self.assertEqual(round(odds, 4), 0.8948)
+        odds, _ = logistic_regression_estimator.estimate_unit_odds_ratio()
+        self.assertEqual(round(odds[0], 4), 0.8948)

Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@ def __init__(`
`41`	`41`	`outcome=outcome,`
`42`	`42`	`df=df,`
`43`	`43`	`effect_modifiers=effect_modifiers,`
	`44`	`+ alpha=alpha,`
`44`	`45`	`query=query,`
`45`	`46`	`)`
`46`	`47`