diff --git a/causal_testing/estimation/abstract_regression_estimator.py b/causal_testing/estimation/abstract_regression_estimator.py index c6786d20..c048922b 100644 --- a/causal_testing/estimation/abstract_regression_estimator.py +++ b/causal_testing/estimation/abstract_regression_estimator.py @@ -41,6 +41,7 @@ def __init__( outcome=outcome, df=df, effect_modifiers=effect_modifiers, + alpha=alpha, query=query, ) diff --git a/causal_testing/estimation/logistic_regression_estimator.py b/causal_testing/estimation/logistic_regression_estimator.py index ca5537d4..4fb828ba 100644 --- a/causal_testing/estimation/logistic_regression_estimator.py +++ b/causal_testing/estimation/logistic_regression_estimator.py @@ -3,6 +3,7 @@ import logging import numpy as np +import pandas as pd import statsmodels.formula.api as smf from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator @@ -31,11 +32,12 @@ def add_modelling_assumptions(self): self.modelling_assumptions.append("The outcome must be binary.") self.modelling_assumptions.append("Independently and identically distributed errors.") - def estimate_unit_odds_ratio(self) -> float: + def estimate_unit_odds_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]: """Estimate the odds ratio of increasing the treatment by one. In logistic regression, this corresponds to the coefficient of the treatment of interest. :return: The odds ratio. Confidence intervals are not yet supported. """ model = self._run_regression(self.df) - return np.exp(model.params[self.treatment]) + ci_low, ci_high = np.exp(model.conf_int(self.alpha).loc[self.treatment]) + return pd.Series(np.exp(model.params[self.treatment])), [pd.Series(ci_low), pd.Series(ci_high)] diff --git a/causal_testing/json_front/json_class.py b/causal_testing/json_front/json_class.py index fd617228..b9551c5a 100644 --- a/causal_testing/json_front/json_class.py +++ b/causal_testing/json_front/json_class.py @@ -11,6 +11,7 @@ from statistics import StatisticsError import pandas as pd +import numpy as np import scipy from fitter import Fitter, get_common_distributions @@ -21,7 +22,7 @@ from causal_testing.specification.scenario import Scenario from causal_testing.specification.variable import Input, Meta, Output from causal_testing.testing.causal_test_case import CausalTestCase -from causal_testing.testing.causal_test_result import CausalTestResult +from causal_testing.testing.causal_test_result import CausalTestResult, TestValue from causal_testing.testing.base_test_case import BaseTestCase from causal_testing.testing.causal_test_adequacy import DataAdequacy @@ -136,8 +137,10 @@ def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False, failed, msg = self._run_concrete_metamorphic_test(test, f_flag, effects) # If we have a variable to mutate else: - if test["estimate_type"] == "coefficient": - failed, msg = self._run_coefficient_test(test=test, f_flag=f_flag, effects=effects) + if test["estimate_type"] in ["coefficient", "unit_odds_ratio"]: + failed, msg = self._run_coefficient_test( + test=test, f_flag=f_flag, effects=effects, estimate_type=test["estimate_type"] + ) else: failed, msg = self._run_metamorphic_tests( test=test, f_flag=f_flag, effects=effects, mutates=mutates @@ -146,7 +149,7 @@ def run_json_tests(self, effects: dict, estimators: dict, f_flag: bool = False, test["result"] = msg return self.test_plan["tests"] - def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict): + def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict, estimate_type: str = "coefficient"): """Builds structures and runs test case for tests with an estimate_type of 'coefficient'. :param test: Single JSON test definition stored in a mapping (dict) @@ -163,10 +166,11 @@ def _run_coefficient_test(self, test: dict, f_flag: bool, effects: dict): causal_test_case = CausalTestCase( base_test_case=base_test_case, expected_causal_effect=next(effects[effect] for variable, effect in test["expected_effect"].items()), - estimate_type="coefficient", + estimate_type=estimate_type, effect_modifier_configuration={self.scenario.variables[v] for v in test.get("effect_modifiers", [])}, ) failed, result = self._execute_test_case(causal_test_case=causal_test_case, test=test, f_flag=f_flag) + msg = ( f"Executing test: {test['name']} \n" + f" {causal_test_case} \n" @@ -273,10 +277,17 @@ def _execute_test_case( failed = False estimation_model = self._setup_test(causal_test_case=causal_test_case, test=test) - causal_test_result = causal_test_case.execute_test( - estimator=estimation_model, data_collector=self.data_collector - ) - test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result) + try: + causal_test_result = causal_test_case.execute_test( + estimator=estimation_model, data_collector=self.data_collector + ) + test_passes = causal_test_case.expected_causal_effect.apply(causal_test_result) + except np.linalg.LinAlgError as e: + result = CausalTestResult( + estimator=estimation_model, + test_value=TestValue("Error", str(e)), + ) + return None, result if "coverage" in test and test["coverage"]: adequacy_metric = DataAdequacy(causal_test_case, estimation_model) diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py index be10566b..6c1ea86e 100644 --- a/causal_testing/testing/causal_test_outcome.py +++ b/causal_testing/testing/causal_test_outcome.py @@ -29,7 +29,7 @@ class SomeEffect(CausalTestOutcome): def apply(self, res: CausalTestResult) -> bool: if res.ci_low() is None or res.ci_high() is None: return None - if res.test_value.type in ("risk_ratio", "hazard_ratio"): + if res.test_value.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"): return any( 1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()) ) @@ -54,7 +54,7 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05): self.ctol = ctol def apply(self, res: CausalTestResult) -> bool: - if res.test_value.type in ("risk_ratio", "hazard_ratio"): + if res.test_value.type in ("risk_ratio", "hazard_ratio", "unit_odds_ratio"): return any( ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in zip(res.ci_low(), res.ci_high(), res.test_value.value) diff --git a/dafni/main_dafni.py b/dafni/main_dafni.py index 4cf75cb6..d2cdb457 100644 --- a/dafni/main_dafni.py +++ b/dafni/main_dafni.py @@ -12,6 +12,7 @@ from causal_testing.specification.variable import Input, Output from causal_testing.testing.causal_test_outcome import Positive, Negative, NoEffect, SomeEffect from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator +from causal_testing.estimation.logistic_regression_estimator import LogisticRegressionEstimator from causal_testing.json_front.json_class import JsonUtility @@ -29,44 +30,36 @@ def get_args(test_args=None) -> argparse.Namespace: - argparse.Namespace - A Namsespace consisting of the arguments to this script """ parser = argparse.ArgumentParser(description="A script for running the CTF on DAFNI.") - - parser.add_argument("--data_path", required=True, help="Path to the input runtime data (.csv)", nargs="+") - - parser.add_argument( - "--tests_path", required=True, help="Input configuration file path " "containing the causal tests (.json)" - ) - + parser.add_argument("-d", "--data_path", required=True, help="Path to the input runtime data (.csv)", nargs="+") parser.add_argument( - "-i", "--ignore_cycles", action="store_true", help="Whether to ignore cycles in the DAG.", default=False + "-t", "--tests_path", required=True, help="Input configuration file path " "containing the causal tests (.json)" ) - parser.add_argument( + "-v", "--variables_path", required=True, help="Input configuration file path " "containing the predefined variables (.json)", ) - parser.add_argument( + "-D", "--dag_path", required=True, help="Input configuration file path containing a valid DAG (.dot). " "Note: this must be supplied if the --tests argument isn't provided.", ) - - parser.add_argument("--output_path", required=False, help="Path to the output directory.") - + parser.add_argument( + "-i", "--ignore_cycles", action="store_true", help="Whether to ignore cycles in the DAG.", default=False + ) parser.add_argument( "-f", default=False, help="(Optional) Failure flag to step the framework from running if a test has failed." ) - + parser.add_argument("-o", "--output_path", required=False, help="Path to the output directory.") parser.add_argument( "-w", default=False, help="(Optional) Specify to overwrite any existing output files. " - "This can lead to the loss of existing outputs if not " - "careful", + "This can lead to the loss of existing outputs if not careful", ) - args = parser.parse_args(test_args) # Convert these to Path objects for main() @@ -165,7 +158,10 @@ def main(): modelling_scenario.setup_treatment_variables() - estimators = {"LinearRegressionEstimator": LinearRegressionEstimator} + estimators = { + "LinearRegressionEstimator": LinearRegressionEstimator, + "LogisticRegressionEstimator": LogisticRegressionEstimator, + } # Step 3: Define the expected variables diff --git a/tests/estimation_tests/test_logistic_regression_estimator.py b/tests/estimation_tests/test_logistic_regression_estimator.py index ac9688ae..544e58a5 100644 --- a/tests/estimation_tests/test_logistic_regression_estimator.py +++ b/tests/estimation_tests/test_logistic_regression_estimator.py @@ -19,5 +19,5 @@ def setUpClass(cls) -> None: def test_odds_ratio(self): df = self.scarf_df.copy() logistic_regression_estimator = LogisticRegressionEstimator("length_in", 65, 55, set(), "completed", df) - odds = logistic_regression_estimator.estimate_unit_odds_ratio() - self.assertEqual(round(odds, 4), 0.8948) + odds, _ = logistic_regression_estimator.estimate_unit_odds_ratio() + self.assertEqual(round(odds[0], 4), 0.8948)