CITCOM-project
diff --git a/‎.github/workflows/publish-to-pypi.yaml
Lines changed: 7 additions & 8 deletions b/‎.github/workflows/publish-to-pypi.yaml
Lines changed: 7 additions & 8 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎causal_testing/__main__.py
Lines changed: 12 additions & 3 deletions b/‎causal_testing/__main__.py
Lines changed: 12 additions & 3 deletions
diff --git a/‎causal_testing/estimation/__init__.py b/‎causal_testing/estimation/__init__.py
diff --git a/‎causal_testing/estimation/cubic_spline_estimator.py
Lines changed: 3 additions & 2 deletions b/‎causal_testing/estimation/cubic_spline_estimator.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎causal_testing/estimation/effect_estimate.py
Lines changed: 43 additions & 0 deletions b/‎causal_testing/estimation/effect_estimate.py
Lines changed: 43 additions & 0 deletions
diff --git a/‎causal_testing/estimation/experimental_estimator.py
Lines changed: 16 additions & 9 deletions b/‎causal_testing/estimation/experimental_estimator.py
Lines changed: 16 additions & 9 deletions
diff --git a/‎causal_testing/estimation/instrumental_variable_estimator.py
Lines changed: 5 additions & 4 deletions b/‎causal_testing/estimation/instrumental_variable_estimator.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎causal_testing/estimation/ipcw_estimator.py
Lines changed: 3 additions & 2 deletions b/‎causal_testing/estimation/ipcw_estimator.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎causal_testing/estimation/linear_regression_estimator.py
Lines changed: 12 additions & 9 deletions b/‎causal_testing/estimation/linear_regression_estimator.py
Lines changed: 12 additions & 9 deletions
@@ -1,4 +1,4 @@
-name: Publish python PyPI
+name: Publish to PyPI
 
 on:
   push:
@@ -17,17 +17,16 @@ jobs:
         uses: actions/setup-python@v3
         with:
           python-version: '3.10'
-      - name: Installing package
+
+      - name: Install build tools
         run: |
-          pip3 install .
-          pip3 install .[pypi]
-          pip3 install build wheel
-          pip3 install setuptools --upgrade
-          pip3 install setuptools_scm
+          pip install --upgrade pip setuptools wheel build setuptools_scm
+
       - name: Build Package
         run: |
           python -m build --no-isolation
+
       - name: Publish package to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          password: ${{ secrets.PYPI_API_TOKEN }}
+          password: ${{ secrets.PYPI_API_TOKEN }}
@@ -66,12 +66,12 @@ For more information on how to use the Causal Testing Framework, please refer to
 2. If you do not already have causal test cases, you can convert your causal DAG to causal tests by running the following command.
 
 ```
-python causal_testing/testing/metamorphic_relation.py --dag_path $PATH_TO_DAG --output_path $PATH_TO_TESTS
+python -m causal_testing generate --dag_path $PATH_TO_DAG --output_path $PATH_TO_TESTS
 ```
 
 3. You can now execute your tests by running the following command.
 ```
-python -m causal_testing --dag_path $PATH_TO_DAG --data_paths $PATH_TO_DATA --test_config $PATH_TO_TESTS --output $OUTPUT
+python -m causal_testing test --dag_path $PATH_TO_DAG --data_paths $PATH_TO_DATA --test_config $PATH_TO_TESTS --output $OUTPUT
 ```
 The results will be saved for inspection in a JSON file located at `$OUTPUT`.
 In the future, we hope to add a visualisation tool to assist with this.
 
@@ -6,7 +6,7 @@
 import os
 
 from causal_testing.testing.metamorphic_relation import generate_causal_tests
-from .main import setup_logging, parse_args, CausalTestingPaths, CausalTestingFramework
+from .main import setup_logging, parse_args, CausalTestingPaths, CausalTestingFramework, Command
 
 
 def main() -> None:
@@ -19,9 +19,18 @@ def main() -> None:
     # Parse arguments
     args = parse_args()
 
-    if args.generate:
+    if args.command == Command.GENERATE:
         logging.info("Generating causal tests")
-        generate_causal_tests(args.dag_path, args.output, args.ignore_cycles, args.threads)
+        generate_causal_tests(
+            args.dag_path,
+            args.output,
+            args.ignore_cycles,
+            args.threads,
+            effect_type=args.effect_type,
+            estimate_type=args.estimate_type,
+            estimator=args.estimator,
+            skip=True,
+        )
         logging.info("Causal test generation completed successfully")
         return
 
 
@@ -8,6 +8,7 @@
 
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
@@ -47,7 +48,7 @@ def __init__(
             )
             self.formula = f"{base_test_case.outcome_variable.name} ~ cr({'+'.join(terms)}, df={basis})"
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> EffectEstimate:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -74,4 +75,4 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
         x[self.base_test_case.treatment_variable.name] = self.control_value
         control = model.predict(x).iloc[0]
 
-        return pd.Series(treatment - control)
+        return EffectEstimate("ate", pd.Series(treatment - control))
@@ -0,0 +1,43 @@
+"""
+This module contains the EffectEstimate dataclass.
+"""
+
+from dataclasses import dataclass
+import pandas as pd
+
+
+@dataclass
+class EffectEstimate:
+    """
+    A dataclass to hold the value and confidence intervals of a causal effect estimate
+
+    :ivar type: The type of estimate, e.g. ate, or risk_ratio
+                (used to determine whether the estimate matches the expected effect)
+    :ivar value: The estimated causal effect
+    :ivar ci_low: The lower confidence interval
+    :ivar ci_high: The upper confidence interval
+    """
+
+    type: str
+    value: pd.Series
+    ci_low: pd.Series = None
+    ci_high: pd.Series = None
+
+    def ci_valid(self) -> bool:
+        """Return whether or not the result has valid confidence invervals"""
+        return (
+            self.ci_low is not None
+            and self.ci_high is not None
+            and not (pd.isnull(self.ci_low).any() or pd.isnull(self.ci_high).any())
+        )
+
+    def to_dict(self) -> dict:
+        """Return representation as a dict."""
+        d = {"effect_measure": self.type, "effect_estimate": self.value.to_dict()}
+        if self.ci_valid():
+            return d | {"ci_low": self.ci_low.to_dict(), "ci_high": self.ci_high.to_dict()}
+        return d
+
+    def to_df(self) -> pd.DataFrame:
+        """Return representation as a pandas dataframe."""
+        return pd.DataFrame({"effect_estimate": self.value, "ci_low": self.ci_low, "ci_high": self.ci_high})
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 
@@ -55,7 +56,7 @@ def run_system(self, configuration: dict) -> dict:
         :returns: The resulting output as a dict.
         """
 
-    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_ate(self) -> EffectEstimate:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -88,14 +89,20 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         ci_low = difference.iloc[ci_low_index]
         ci_high = difference.iloc[self.repeats - ci_low_index]
 
-        return pd.Series(
-            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
-        ), [
+        return EffectEstimate(
+            "ate",
+            pd.Series(
+                {
+                    self.base_test_case.treatment_variable.name: difference.mean()[
+                        self.base_test_case.outcome_variable.name
+                    ]
+                }
+            ),
             pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
             pd.Series(
                 {self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
             ),
-        ]
+        )
 
     def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the risk ratio of the treatment on the outcome. That is, the change in outcome caused
@@ -130,11 +137,11 @@ def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         ci_low = difference.iloc[ci_low_index]
         ci_high = difference.iloc[self.repeats - ci_low_index]
 
-        return pd.Series(
-            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
-        ), [
+        return EffectEstimate(
+            "ate",
+            {self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]},
             pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
             pd.Series(
                 {self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
             ),
-        ]
+        )
@@ -7,6 +7,7 @@
 import statsmodels.api as sm
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ def add_modelling_assumptions(self):
         """
         )
 
-    def estimate_iv_coefficient(self, df) -> float:
+    def iv_coefficient(self, df) -> float:
         """
         Estimate the linear regression coefficient of the treatment on the
         outcome.
@@ -75,16 +76,16 @@ def estimate_iv_coefficient(self, df) -> float:
         # Estimate the coefficient of I on X by cancelling
         return ab / a
 
-    def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_coefficient(self, bootstrap_size=100) -> EffectEstimate:
         """
         Estimate the unit ate (i.e. coefficient) of the treatment on the
         outcome.
         """
         bootstraps = sorted(
-            [self.estimate_iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
+            [self.iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
         )
         bound = ceil((bootstrap_size * self.alpha) / 2)
         ci_low = pd.Series(bootstraps[bound])
         ci_high = pd.Series(bootstraps[bootstrap_size - bound])
 
-        return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
+        return EffectEstimate("coefficient", pd.Series(self.iv_coefficient(self.df)), ci_low, ci_high)
@@ -11,6 +11,7 @@
 from lifelines import CoxPHFitter
 
 from causal_testing.estimation.abstract_estimator import Estimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.specification.variable import Variable
 
@@ -285,7 +286,7 @@ def preprocess_data(self):
         if len(self.df.loc[self.df["trtrand"] == 1]) == 0:
             raise ValueError(f"No individuals began the treatment strategy {self.treatment_strategy}")
 
-    def estimate_hazard_ratio(self):
+    def estimate_hazard_ratio(self) -> EffectEstimate:
         """
         Estimate the hazard ratio.
         """
@@ -380,4 +381,4 @@ def estimate_hazard_ratio(self):
 
         ci_low, ci_high = [np.exp(cox_ph.confidence_intervals_)[col] for col in cox_ph.confidence_intervals_.columns]
 
-        return (cox_ph.hazard_ratios_, (ci_low, ci_high))
+        return EffectEstimate("hazard_ratio", cox_ph.hazard_ratios_, ci_low, ci_high)
@@ -10,6 +10,7 @@
 from causal_testing.specification.variable import Variable
 from causal_testing.estimation.genetic_programming_regression_fitter import GP
 from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator
+from causal_testing.estimation.effect_estimate import EffectEstimate
 from causal_testing.testing.base_test_case import BaseTestCase
 
 logger = logging.getLogger(__name__)
@@ -92,7 +93,7 @@ def gp_formula(
         formula = gp.simplify(formula)
         self.formula = f"{self.base_test_case.outcome_variable.name} ~ I({formula}) - 1"
 
-    def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_coefficient(self) -> EffectEstimate:
         """Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
         caused by a unit change in treatment.
 
@@ -121,9 +122,9 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         ), f"{treatment} not in\n{'  ' + str(model.params.index).replace(newline, newline + '  ')}"
         unit_effect = model.params[treatment]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
-        return unit_effect, [ci_low, ci_high]
+        return EffectEstimate("coefficient", unit_effect, ci_low, ci_high)
 
-    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_ate(self) -> EffectEstimate:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -146,10 +147,10 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
         ate = pd.Series(t_test_results.effect[0])
         confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
-        confidence_intervals = [pd.Series(interval) for interval in confidence_intervals]
-        return ate, confidence_intervals
+        ci_low, ci_high = [pd.Series(interval) for interval in confidence_intervals]
+        return EffectEstimate("ate", ate, ci_low, ci_high)
 
-    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_risk_ratio(self, adjustment_config: dict = None) -> EffectEstimate:
         """Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -159,9 +160,11 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series
         control_outcome, treatment_outcome = prediction.iloc[1], prediction.iloc[0]
         ci_low = pd.Series(treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"])
         ci_high = pd.Series(treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"])
-        return pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
+        return EffectEstimate(
+            "risk_ratio", pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), ci_low, ci_high
+        )
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> EffectEstimate:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -177,7 +180,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Se
         control_outcome, treatment_outcome = prediction.iloc[1], prediction.iloc[0]
         ci_low = pd.Series(treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"])
         ci_high = pd.Series(treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"])
-        return pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
+        return EffectEstimate("ate", pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), ci_low, ci_high)
 
     def _get_confidence_intervals(self, model, treatment):
         confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)