Skip to content

Commit 1cef1fb

Browse files
committed
Examples
1 parent 9a82172 commit 1cef1fb

File tree

2 files changed

+280
-0
lines changed

2 files changed

+280
-0
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import logging
2+
3+
from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
4+
from causal_testing.testing.causal_test_outcome import ExactValue, Positive, Negative, NoEffect
5+
from causal_testing.json_front.json_class import JsonUtility
6+
from causal_testing.specification.scenario import Scenario
7+
from causal_testing.specification.variable import Input, Output
8+
9+
10+
logger = logging.getLogger(__name__)
11+
logging.basicConfig(level=logging.DEBUG, format="%(message)s")
12+
13+
effects = {
14+
"Positive": Positive(),
15+
"Negative": Negative(),
16+
"ExactValue4_05": ExactValue(4, atol=0.5),
17+
"NoEffect": NoEffect(),
18+
}
19+
20+
estimators = {
21+
"LinearRegressionEstimator": LinearRegressionEstimator,
22+
}
23+
24+
# 2. Create variables
25+
width = Input("width", float)
26+
height = Input("height", float)
27+
intensity = Input("intensity", float)
28+
29+
num_lines_abs = Output("num_lines_abs", float)
30+
num_lines_unit = Output("num_lines_unit", float)
31+
num_shapes_abs = Output("num_shapes_abs", float)
32+
num_shapes_unit = Output("num_shapes_unit", float)
33+
34+
# 3. Create scenario by applying constraints over a subset of the input variables
35+
scenario = Scenario(
36+
variables={
37+
width,
38+
height,
39+
intensity,
40+
num_lines_abs,
41+
num_lines_unit,
42+
num_shapes_abs,
43+
num_shapes_unit,
44+
}
45+
)
46+
scenario.setup_treatment_variables()
47+
48+
mutates = {
49+
"Increase": lambda x: scenario.treatment_variables[x].z3 > scenario.variables[x].z3,
50+
"ChangeByFactor(2)": lambda x: scenario.treatment_variables[x].z3 == scenario.variables[x].z3 * 2,
51+
}
52+
53+
54+
if __name__ == "__main__":
55+
args = JsonUtility.get_args()
56+
json_utility = JsonUtility(args.log_path) # Create an instance of the extended JsonUtility class
57+
json_utility.set_paths(
58+
args.json_path, args.dag_path, args.data_path
59+
) # Set the path to the data.csv, dag.dot and causal_tests.json file
60+
61+
# Load the Causal Variables into the JsonUtility class ready to be used in the tests
62+
json_utility.setup(scenario=scenario) # Sets up all the necessary parts of the json_class needed to execute tests
63+
64+
json_utility.run_json_tests(effects=effects, mutates=mutates, estimators=estimators, f_flag=args.f)
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
import os
2+
import logging
3+
4+
import pandas as pd
5+
6+
from causal_testing.specification.causal_dag import CausalDAG
7+
from causal_testing.specification.scenario import Scenario
8+
from causal_testing.specification.variable import Input, Output
9+
from causal_testing.specification.causal_specification import CausalSpecification
10+
from causal_testing.testing.causal_test_case import CausalTestCase
11+
from causal_testing.testing.causal_test_outcome import ExactValue, Positive
12+
from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator
13+
from causal_testing.estimation.abstract_estimator import Estimator
14+
from causal_testing.testing.base_test_case import BaseTestCase
15+
from causal_testing.testing.causal_test_suite import CausalTestSuite
16+
17+
18+
logger = logging.getLogger(__name__)
19+
logging.basicConfig(level=logging.DEBUG, format="%(message)s")
20+
21+
22+
class EmpiricalMeanEstimator(Estimator):
23+
def add_modelling_assumptions(self):
24+
"""
25+
Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that
26+
must hold if the resulting causal inference is to be considered valid.
27+
"""
28+
self.modelling_assumptions += "The data must contain runs with the exact configuration of interest."
29+
30+
def estimate_ate(self) -> float:
31+
"""Estimate the outcomes under control and treatment.
32+
:return: The empirical average treatment effect.
33+
"""
34+
control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna()
35+
treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna()
36+
return treatment_results.mean() - control_results.mean(), None
37+
38+
def estimate_risk_ratio(self) -> float:
39+
"""Estimate the outcomes under control and treatment.
40+
:return: The empirical average treatment effect.
41+
"""
42+
control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna()
43+
treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna()
44+
return treatment_results.mean() / control_results.mean(), None
45+
46+
47+
# 1. Read in the Causal DAG
48+
ROOT = os.path.realpath(os.path.dirname(__file__))
49+
causal_dag = CausalDAG(f"{ROOT}/dag.dot")
50+
51+
# 2. Create variables
52+
width = Input("width", float)
53+
height = Input("height", float)
54+
intensity = Input("intensity", float)
55+
56+
num_lines_abs = Output("num_lines_abs", float)
57+
num_lines_unit = Output("num_lines_unit", float)
58+
num_shapes_abs = Output("num_shapes_abs", float)
59+
num_shapes_unit = Output("num_shapes_unit", float)
60+
61+
# 3. Create scenario
62+
scenario = Scenario(
63+
variables={
64+
width,
65+
height,
66+
intensity,
67+
num_lines_abs,
68+
num_lines_unit,
69+
num_shapes_abs,
70+
num_shapes_unit,
71+
}
72+
)
73+
74+
# 4. Construct a causal specification from the scenario and causal DAG
75+
causal_specification = CausalSpecification(scenario, causal_dag)
76+
77+
observational_data_path = f"{ROOT}/data/random/data_random_1000.csv"
78+
79+
80+
def causal_test_intensity_num_shapes(
81+
observational_data_path,
82+
causal_test_case,
83+
square_terms=[],
84+
inverse_terms=[],
85+
empirical=False,
86+
):
87+
# 8. Set up an estimator
88+
data = pd.read_csv(observational_data_path, index_col=0).astype(float)
89+
90+
treatment = causal_test_case.treatment_variable.name
91+
outcome = causal_test_case.outcome_variable.name
92+
93+
estimator = None
94+
if empirical:
95+
estimator = EmpiricalMeanEstimator(
96+
treatment=[treatment],
97+
control_value=causal_test_case.control_value,
98+
treatment_value=causal_test_case.treatment_value,
99+
adjustment_set=set(),
100+
outcome=[outcome],
101+
df=data,
102+
effect_modifiers=causal_test_case.effect_modifier_configuration,
103+
)
104+
else:
105+
square_terms = [f"I({t} ** 2)" for t in square_terms]
106+
inverse_terms = [f"I({t} ** -1)" for t in inverse_terms]
107+
estimator = LinearRegressionEstimator(
108+
treatment=treatment,
109+
control_value=causal_test_case.control_value,
110+
treatment_value=causal_test_case.treatment_value,
111+
adjustment_set=set(),
112+
outcome=outcome,
113+
df=data,
114+
effect_modifiers=causal_test_case.effect_modifier_configuration,
115+
formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1",
116+
)
117+
118+
# 9. Execute the test
119+
causal_test_result = causal_test_case.execute_test(estimator)
120+
121+
return causal_test_result
122+
123+
124+
def test_poisson_intensity_num_shapes(save=False):
125+
intensity_num_shapes_results = []
126+
base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit)
127+
for wh in range(1, 11):
128+
smt_data_path = f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv"
129+
causal_test_case_list = [
130+
CausalTestCase(
131+
base_test_case=base_test_case,
132+
expected_causal_effect=ExactValue(4, atol=0.5),
133+
treatment_value=treatment_value,
134+
control_value=control_value,
135+
estimate_type="risk_ratio",
136+
)
137+
for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)]
138+
]
139+
test_suite = CausalTestSuite()
140+
test_suite.add_test_object(
141+
base_test_case,
142+
causal_test_case_list=causal_test_case_list,
143+
estimators=[LinearRegressionEstimator, EmpiricalMeanEstimator],
144+
)
145+
test_suite_results = test_suite.execute_test_suite(
146+
causal_specification, pd.read_csv(smt_data_path, index_col=0).astype(float)
147+
)
148+
149+
smt_risk_ratios = [
150+
causal_test_result.test_value.value
151+
for causal_test_result in test_suite_results[base_test_case]["EmpiricalMeanEstimator"]
152+
]
153+
154+
intensity_num_shapes_results += [
155+
{
156+
"width": wh,
157+
"height": wh,
158+
"control": obs_causal_test_result.estimator.control_value,
159+
"treatment": obs_causal_test_result.estimator.treatment_value,
160+
"smt_risk_ratio": smt_causal_test_result.test_value.value,
161+
"obs_risk_ratio": obs_causal_test_result.test_value.value[0],
162+
}
163+
for obs_causal_test_result, smt_causal_test_result in zip(
164+
test_suite_results[base_test_case]["LinearRegressionEstimator"],
165+
test_suite_results[base_test_case]["EmpiricalMeanEstimator"],
166+
)
167+
]
168+
intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results)
169+
if save:
170+
intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv")
171+
logger.info("%s", intensity_num_shapes_results)
172+
173+
174+
def test_poisson_width_num_shapes(save=False):
175+
base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit)
176+
causal_test_case_list = [
177+
CausalTestCase(
178+
base_test_case=base_test_case,
179+
expected_causal_effect=Positive(),
180+
control_value=float(w),
181+
treatment_value=w + 1.0,
182+
estimate_type="ate_calculated",
183+
effect_modifier_configuration={"intensity": i},
184+
)
185+
for i in range(17)
186+
for w in range(1, 10)
187+
]
188+
test_suite = CausalTestSuite()
189+
test_suite.add_test_object(
190+
base_test_case,
191+
causal_test_case_list=causal_test_case_list,
192+
estimators=[LinearRegressionEstimator],
193+
)
194+
test_suite_results = test_suite.execute_test_suite(
195+
causal_specification, pd.read_csv(observational_data_path, index_col=0).astype(float)
196+
)
197+
width_num_shapes_results = [
198+
{
199+
"control": causal_test_result.estimator.control_value,
200+
"treatment": causal_test_result.estimator.treatment_value,
201+
"intensity": causal_test_result.effect_modifier_configuration["intensity"],
202+
"ate": causal_test_result.test_value.value[0],
203+
"ci_low": causal_test_result.confidence_intervals[0][0],
204+
"ci_high": causal_test_result.confidence_intervals[1][0],
205+
}
206+
for causal_test_result in test_suite_results[base_test_case]["LinearRegressionEstimator"]
207+
]
208+
width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
209+
if save:
210+
width_num_shapes_results.to_csv("width_num_shapes_results_random_1000.csv")
211+
logger.info("%s", width_num_shapes_results)
212+
213+
214+
if __name__ == "__main__":
215+
test_poisson_intensity_num_shapes(save=False)
216+
# test_poisson_width_num_shapes(save=True)

0 commit comments

Comments
 (0)