Skip to content

Commit 7eb1990

Browse files
committed
Merge branch 'main' into f-allian/examples
# Conflicts: # examples/poisson-line-process/example_pure_python.py
2 parents 150f942 + fed97ad commit 7eb1990

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+947
-813
lines changed

.github/workflows/publish-to-pypi.yaml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Publish python PyPI
1+
name: Publish to PyPI
22

33
on:
44
push:
@@ -17,17 +17,16 @@ jobs:
1717
uses: actions/setup-python@v3
1818
with:
1919
python-version: '3.10'
20-
- name: Installing package
20+
21+
- name: Install build tools
2122
run: |
22-
pip3 install .
23-
pip3 install .[pypi]
24-
pip3 install build wheel
25-
pip3 install setuptools --upgrade
26-
pip3 install setuptools_scm
23+
pip install --upgrade pip setuptools wheel build setuptools_scm
24+
2725
- name: Build Package
2826
run: |
2927
python -m build --no-isolation
28+
3029
- name: Publish package to PyPI
3130
uses: pypa/gh-action-pypi-publish@release/v1
3231
with:
33-
password: ${{ secrets.PYPI_API_TOKEN }}
32+
password: ${{ secrets.PYPI_API_TOKEN }}

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ For more information on how to use the Causal Testing Framework, please refer to
6666
2. If you do not already have causal test cases, you can convert your causal DAG to causal tests by running the following command.
6767

6868
```
69-
python causal_testing/testing/metamorphic_relation.py --dag_path $PATH_TO_DAG --output_path $PATH_TO_TESTS
69+
python -m causal_testing generate --dag_path $PATH_TO_DAG --output_path $PATH_TO_TESTS
7070
```
7171

7272
3. You can now execute your tests by running the following command.
7373
```
74-
python -m causal_testing --dag_path $PATH_TO_DAG --data_paths $PATH_TO_DATA --test_config $PATH_TO_TESTS --output $OUTPUT
74+
python -m causal_testing test --dag_path $PATH_TO_DAG --data_paths $PATH_TO_DATA --test_config $PATH_TO_TESTS --output $OUTPUT
7575
```
7676
The results will be saved for inspection in a JSON file located at `$OUTPUT`.
7777
In the future, we hope to add a visualisation tool to assist with this.

causal_testing/__main__.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import os
77

88
from causal_testing.testing.metamorphic_relation import generate_causal_tests
9-
from .main import setup_logging, parse_args, CausalTestingPaths, CausalTestingFramework
9+
from .main import setup_logging, parse_args, CausalTestingPaths, CausalTestingFramework, Command
1010

1111

1212
def main() -> None:
@@ -19,9 +19,18 @@ def main() -> None:
1919
# Parse arguments
2020
args = parse_args()
2121

22-
if args.generate:
22+
if args.command == Command.GENERATE:
2323
logging.info("Generating causal tests")
24-
generate_causal_tests(args.dag_path, args.output, args.ignore_cycles, args.threads)
24+
generate_causal_tests(
25+
args.dag_path,
26+
args.output,
27+
args.ignore_cycles,
28+
args.threads,
29+
effect_type=args.effect_type,
30+
estimate_type=args.estimate_type,
31+
estimator=args.estimator,
32+
skip=True,
33+
)
2534
logging.info("Causal test generation completed successfully")
2635
return
2736

causal_testing/estimation/__init__.py

Whitespace-only changes.

causal_testing/estimation/cubic_spline_estimator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from causal_testing.specification.variable import Variable
1010
from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
11+
from causal_testing.estimation.effect_estimate import EffectEstimate
1112
from causal_testing.testing.base_test_case import BaseTestCase
1213

1314
logger = logging.getLogger(__name__)
@@ -47,7 +48,7 @@ def __init__(
4748
)
4849
self.formula = f"{base_test_case.outcome_variable.name} ~ cr({'+'.join(terms)}, df={basis})"
4950

50-
def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
51+
def estimate_ate_calculated(self, adjustment_config: dict = None) -> EffectEstimate:
5152
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
5253
by changing the treatment variable from the control value to the treatment value. Here, we actually
5354
calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -74,4 +75,4 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
7475
x[self.base_test_case.treatment_variable.name] = self.control_value
7576
control = model.predict(x).iloc[0]
7677

77-
return pd.Series(treatment - control)
78+
return EffectEstimate("ate", pd.Series(treatment - control))
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""
2+
This module contains the EffectEstimate dataclass.
3+
"""
4+
5+
from dataclasses import dataclass
6+
import pandas as pd
7+
8+
9+
@dataclass
10+
class EffectEstimate:
11+
"""
12+
A dataclass to hold the value and confidence intervals of a causal effect estimate
13+
14+
:ivar type: The type of estimate, e.g. ate, or risk_ratio
15+
(used to determine whether the estimate matches the expected effect)
16+
:ivar value: The estimated causal effect
17+
:ivar ci_low: The lower confidence interval
18+
:ivar ci_high: The upper confidence interval
19+
"""
20+
21+
type: str
22+
value: pd.Series
23+
ci_low: pd.Series = None
24+
ci_high: pd.Series = None
25+
26+
def ci_valid(self) -> bool:
27+
"""Return whether or not the result has valid confidence invervals"""
28+
return (
29+
self.ci_low is not None
30+
and self.ci_high is not None
31+
and not (pd.isnull(self.ci_low).any() or pd.isnull(self.ci_high).any())
32+
)
33+
34+
def to_dict(self) -> dict:
35+
"""Return representation as a dict."""
36+
d = {"effect_measure": self.type, "effect_estimate": self.value.to_dict()}
37+
if self.ci_valid():
38+
return d | {"ci_low": self.ci_low.to_dict(), "ci_high": self.ci_high.to_dict()}
39+
return d
40+
41+
def to_df(self) -> pd.DataFrame:
42+
"""Return representation as a pandas dataframe."""
43+
return pd.DataFrame({"effect_estimate": self.value, "ci_low": self.ci_low, "ci_high": self.ci_high})

causal_testing/estimation/experimental_estimator.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66

77
from causal_testing.estimation.abstract_estimator import Estimator
8+
from causal_testing.estimation.effect_estimate import EffectEstimate
89
from causal_testing.testing.base_test_case import BaseTestCase
910

1011

@@ -55,7 +56,7 @@ def run_system(self, configuration: dict) -> dict:
5556
:returns: The resulting output as a dict.
5657
"""
5758

58-
def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
59+
def estimate_ate(self) -> EffectEstimate:
5960
"""Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
6061
by changing the treatment variable from the control value to the treatment value.
6162
@@ -88,14 +89,20 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
8889
ci_low = difference.iloc[ci_low_index]
8990
ci_high = difference.iloc[self.repeats - ci_low_index]
9091

91-
return pd.Series(
92-
{self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
93-
), [
92+
return EffectEstimate(
93+
"ate",
94+
pd.Series(
95+
{
96+
self.base_test_case.treatment_variable.name: difference.mean()[
97+
self.base_test_case.outcome_variable.name
98+
]
99+
}
100+
),
94101
pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
95102
pd.Series(
96103
{self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
97104
),
98-
]
105+
)
99106

100107
def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
101108
"""Estimate the risk ratio of the treatment on the outcome. That is, the change in outcome caused
@@ -130,11 +137,11 @@ def estimate_risk_ratio(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
130137
ci_low = difference.iloc[ci_low_index]
131138
ci_high = difference.iloc[self.repeats - ci_low_index]
132139

133-
return pd.Series(
134-
{self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]}
135-
), [
140+
return EffectEstimate(
141+
"ate",
142+
{self.base_test_case.treatment_variable.name: difference.mean()[self.base_test_case.outcome_variable.name]},
136143
pd.Series({self.base_test_case.treatment_variable.name: ci_low[self.base_test_case.outcome_variable.name]}),
137144
pd.Series(
138145
{self.base_test_case.treatment_variable.name: ci_high[self.base_test_case.outcome_variable.name]}
139146
),
140-
]
147+
)

causal_testing/estimation/instrumental_variable_estimator.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import statsmodels.api as sm
88

99
from causal_testing.estimation.abstract_estimator import Estimator
10+
from causal_testing.estimation.effect_estimate import EffectEstimate
1011
from causal_testing.testing.base_test_case import BaseTestCase
1112

1213
logger = logging.getLogger(__name__)
@@ -61,7 +62,7 @@ def add_modelling_assumptions(self):
6162
"""
6263
)
6364

64-
def estimate_iv_coefficient(self, df) -> float:
65+
def iv_coefficient(self, df) -> float:
6566
"""
6667
Estimate the linear regression coefficient of the treatment on the
6768
outcome.
@@ -75,16 +76,16 @@ def estimate_iv_coefficient(self, df) -> float:
7576
# Estimate the coefficient of I on X by cancelling
7677
return ab / a
7778

78-
def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
79+
def estimate_coefficient(self, bootstrap_size=100) -> EffectEstimate:
7980
"""
8081
Estimate the unit ate (i.e. coefficient) of the treatment on the
8182
outcome.
8283
"""
8384
bootstraps = sorted(
84-
[self.estimate_iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
85+
[self.iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
8586
)
8687
bound = ceil((bootstrap_size * self.alpha) / 2)
8788
ci_low = pd.Series(bootstraps[bound])
8889
ci_high = pd.Series(bootstraps[bootstrap_size - bound])
8990

90-
return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
91+
return EffectEstimate("coefficient", pd.Series(self.iv_coefficient(self.df)), ci_low, ci_high)

causal_testing/estimation/ipcw_estimator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from lifelines import CoxPHFitter
1212

1313
from causal_testing.estimation.abstract_estimator import Estimator
14+
from causal_testing.estimation.effect_estimate import EffectEstimate
1415
from causal_testing.testing.base_test_case import BaseTestCase
1516
from causal_testing.specification.variable import Variable
1617

@@ -285,7 +286,7 @@ def preprocess_data(self):
285286
if len(self.df.loc[self.df["trtrand"] == 1]) == 0:
286287
raise ValueError(f"No individuals began the treatment strategy {self.treatment_strategy}")
287288

288-
def estimate_hazard_ratio(self):
289+
def estimate_hazard_ratio(self) -> EffectEstimate:
289290
"""
290291
Estimate the hazard ratio.
291292
"""
@@ -380,4 +381,4 @@ def estimate_hazard_ratio(self):
380381

381382
ci_low, ci_high = [np.exp(cox_ph.confidence_intervals_)[col] for col in cox_ph.confidence_intervals_.columns]
382383

383-
return (cox_ph.hazard_ratios_, (ci_low, ci_high))
384+
return EffectEstimate("hazard_ratio", cox_ph.hazard_ratios_, ci_low, ci_high)

causal_testing/estimation/linear_regression_estimator.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from causal_testing.specification.variable import Variable
1111
from causal_testing.estimation.genetic_programming_regression_fitter import GP
1212
from causal_testing.estimation.abstract_regression_estimator import RegressionEstimator
13+
from causal_testing.estimation.effect_estimate import EffectEstimate
1314
from causal_testing.testing.base_test_case import BaseTestCase
1415

1516
logger = logging.getLogger(__name__)
@@ -92,7 +93,7 @@ def gp_formula(
9293
formula = gp.simplify(formula)
9394
self.formula = f"{self.base_test_case.outcome_variable.name} ~ I({formula}) - 1"
9495

95-
def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
96+
def estimate_coefficient(self) -> EffectEstimate:
9697
"""Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
9798
caused by a unit change in treatment.
9899
@@ -121,9 +122,9 @@ def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
121122
), f"{treatment} not in\n{' ' + str(model.params.index).replace(newline, newline + ' ')}"
122123
unit_effect = model.params[treatment] # Unit effect is the coefficient of the treatment
123124
[ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
124-
return unit_effect, [ci_low, ci_high]
125+
return EffectEstimate("coefficient", unit_effect, ci_low, ci_high)
125126

126-
def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
127+
def estimate_ate(self) -> EffectEstimate:
127128
"""Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
128129
by changing the treatment variable from the control value to the treatment value.
129130
@@ -146,10 +147,10 @@ def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
146147
t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
147148
ate = pd.Series(t_test_results.effect[0])
148149
confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
149-
confidence_intervals = [pd.Series(interval) for interval in confidence_intervals]
150-
return ate, confidence_intervals
150+
ci_low, ci_high = [pd.Series(interval) for interval in confidence_intervals]
151+
return EffectEstimate("ate", ate, ci_low, ci_high)
151152

152-
def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
153+
def estimate_risk_ratio(self, adjustment_config: dict = None) -> EffectEstimate:
153154
"""Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
154155
by changing the treatment variable from the control value to the treatment value.
155156
@@ -159,9 +160,11 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series
159160
control_outcome, treatment_outcome = prediction.iloc[1], prediction.iloc[0]
160161
ci_low = pd.Series(treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"])
161162
ci_high = pd.Series(treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"])
162-
return pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
163+
return EffectEstimate(
164+
"risk_ratio", pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), ci_low, ci_high
165+
)
163166

164-
def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
167+
def estimate_ate_calculated(self, adjustment_config: dict = None) -> EffectEstimate:
165168
"""Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
166169
by changing the treatment variable from the control value to the treatment value. Here, we actually
167170
calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -177,7 +180,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Se
177180
control_outcome, treatment_outcome = prediction.iloc[1], prediction.iloc[0]
178181
ci_low = pd.Series(treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"])
179182
ci_high = pd.Series(treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"])
180-
return pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
183+
return EffectEstimate("ate", pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), ci_low, ci_high)
181184

182185
def _get_confidence_intervals(self, model, treatment):
183186
confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)

0 commit comments

Comments
 (0)