|
| 1 | +import os |
| 2 | +import logging |
| 3 | + |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +from causal_testing.specification.causal_dag import CausalDAG |
| 7 | +from causal_testing.specification.scenario import Scenario |
| 8 | +from causal_testing.specification.variable import Input, Output |
| 9 | +from causal_testing.specification.causal_specification import CausalSpecification |
| 10 | +from causal_testing.testing.causal_test_case import CausalTestCase |
| 11 | +from causal_testing.testing.causal_test_outcome import ExactValue, Positive |
| 12 | +from causal_testing.estimation.linear_regression_estimator import LinearRegressionEstimator |
| 13 | +from causal_testing.estimation.abstract_estimator import Estimator |
| 14 | +from causal_testing.testing.base_test_case import BaseTestCase |
| 15 | +from causal_testing.testing.causal_test_suite import CausalTestSuite |
| 16 | + |
| 17 | + |
| 18 | +logger = logging.getLogger(__name__) |
| 19 | +logging.basicConfig(level=logging.DEBUG, format="%(message)s") |
| 20 | + |
| 21 | + |
| 22 | +class EmpiricalMeanEstimator(Estimator): |
| 23 | + def add_modelling_assumptions(self): |
| 24 | + """ |
| 25 | + Add modelling assumptions to the estimator. This is a list of strings which list the modelling assumptions that |
| 26 | + must hold if the resulting causal inference is to be considered valid. |
| 27 | + """ |
| 28 | + self.modelling_assumptions += "The data must contain runs with the exact configuration of interest." |
| 29 | + |
| 30 | + def estimate_ate(self) -> float: |
| 31 | + """Estimate the outcomes under control and treatment. |
| 32 | + :return: The empirical average treatment effect. |
| 33 | + """ |
| 34 | + control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna() |
| 35 | + treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna() |
| 36 | + return treatment_results.mean() - control_results.mean(), None |
| 37 | + |
| 38 | + def estimate_risk_ratio(self) -> float: |
| 39 | + """Estimate the outcomes under control and treatment. |
| 40 | + :return: The empirical average treatment effect. |
| 41 | + """ |
| 42 | + control_results = self.df.where(self.df[self.treatment] == self.control_value)[self.outcome].dropna() |
| 43 | + treatment_results = self.df.where(self.df[self.treatment] == self.treatment_value)[self.outcome].dropna() |
| 44 | + return treatment_results.mean() / control_results.mean(), None |
| 45 | + |
| 46 | + |
| 47 | +# 1. Read in the Causal DAG |
| 48 | +ROOT = os.path.realpath(os.path.dirname(__file__)) |
| 49 | +causal_dag = CausalDAG(f"{ROOT}/dag.dot") |
| 50 | + |
| 51 | +# 2. Create variables |
| 52 | +width = Input("width", float) |
| 53 | +height = Input("height", float) |
| 54 | +intensity = Input("intensity", float) |
| 55 | + |
| 56 | +num_lines_abs = Output("num_lines_abs", float) |
| 57 | +num_lines_unit = Output("num_lines_unit", float) |
| 58 | +num_shapes_abs = Output("num_shapes_abs", float) |
| 59 | +num_shapes_unit = Output("num_shapes_unit", float) |
| 60 | + |
| 61 | +# 3. Create scenario |
| 62 | +scenario = Scenario( |
| 63 | + variables={ |
| 64 | + width, |
| 65 | + height, |
| 66 | + intensity, |
| 67 | + num_lines_abs, |
| 68 | + num_lines_unit, |
| 69 | + num_shapes_abs, |
| 70 | + num_shapes_unit, |
| 71 | + } |
| 72 | +) |
| 73 | + |
| 74 | +# 4. Construct a causal specification from the scenario and causal DAG |
| 75 | +causal_specification = CausalSpecification(scenario, causal_dag) |
| 76 | + |
| 77 | +observational_data_path = f"{ROOT}/data/random/data_random_1000.csv" |
| 78 | + |
| 79 | + |
| 80 | +def causal_test_intensity_num_shapes( |
| 81 | + observational_data_path, |
| 82 | + causal_test_case, |
| 83 | + square_terms=[], |
| 84 | + inverse_terms=[], |
| 85 | + empirical=False, |
| 86 | +): |
| 87 | + # 8. Set up an estimator |
| 88 | + data = pd.read_csv(observational_data_path, index_col=0).astype(float) |
| 89 | + |
| 90 | + treatment = causal_test_case.treatment_variable.name |
| 91 | + outcome = causal_test_case.outcome_variable.name |
| 92 | + |
| 93 | + estimator = None |
| 94 | + if empirical: |
| 95 | + estimator = EmpiricalMeanEstimator( |
| 96 | + treatment=[treatment], |
| 97 | + control_value=causal_test_case.control_value, |
| 98 | + treatment_value=causal_test_case.treatment_value, |
| 99 | + adjustment_set=set(), |
| 100 | + outcome=[outcome], |
| 101 | + df=data, |
| 102 | + effect_modifiers=causal_test_case.effect_modifier_configuration, |
| 103 | + ) |
| 104 | + else: |
| 105 | + square_terms = [f"I({t} ** 2)" for t in square_terms] |
| 106 | + inverse_terms = [f"I({t} ** -1)" for t in inverse_terms] |
| 107 | + estimator = LinearRegressionEstimator( |
| 108 | + treatment=treatment, |
| 109 | + control_value=causal_test_case.control_value, |
| 110 | + treatment_value=causal_test_case.treatment_value, |
| 111 | + adjustment_set=set(), |
| 112 | + outcome=outcome, |
| 113 | + df=data, |
| 114 | + effect_modifiers=causal_test_case.effect_modifier_configuration, |
| 115 | + formula=f"{outcome} ~ {treatment} + {'+'.join(square_terms + inverse_terms + list([e for e in causal_test_case.effect_modifier_configuration]))} -1", |
| 116 | + ) |
| 117 | + |
| 118 | + # 9. Execute the test |
| 119 | + causal_test_result = causal_test_case.execute_test(estimator) |
| 120 | + |
| 121 | + return causal_test_result |
| 122 | + |
| 123 | + |
| 124 | +def test_poisson_intensity_num_shapes(save=False): |
| 125 | + intensity_num_shapes_results = [] |
| 126 | + base_test_case = BaseTestCase(treatment_variable=intensity, outcome_variable=num_shapes_unit) |
| 127 | + for wh in range(1, 11): |
| 128 | + smt_data_path = f"{ROOT}/data/smt_100/data_smt_wh{wh}_100.csv" |
| 129 | + causal_test_case_list = [ |
| 130 | + CausalTestCase( |
| 131 | + base_test_case=base_test_case, |
| 132 | + expected_causal_effect=ExactValue(4, atol=0.5), |
| 133 | + treatment_value=treatment_value, |
| 134 | + control_value=control_value, |
| 135 | + estimate_type="risk_ratio", |
| 136 | + ) |
| 137 | + for control_value, treatment_value in [(1, 2), (2, 4), (4, 8), (8, 16)] |
| 138 | + ] |
| 139 | + test_suite = CausalTestSuite() |
| 140 | + test_suite.add_test_object( |
| 141 | + base_test_case, |
| 142 | + causal_test_case_list=causal_test_case_list, |
| 143 | + estimators=[LinearRegressionEstimator, EmpiricalMeanEstimator], |
| 144 | + ) |
| 145 | + test_suite_results = test_suite.execute_test_suite( |
| 146 | + causal_specification, pd.read_csv(smt_data_path, index_col=0).astype(float) |
| 147 | + ) |
| 148 | + |
| 149 | + smt_risk_ratios = [ |
| 150 | + causal_test_result.test_value.value |
| 151 | + for causal_test_result in test_suite_results[base_test_case]["EmpiricalMeanEstimator"] |
| 152 | + ] |
| 153 | + |
| 154 | + intensity_num_shapes_results += [ |
| 155 | + { |
| 156 | + "width": wh, |
| 157 | + "height": wh, |
| 158 | + "control": obs_causal_test_result.estimator.control_value, |
| 159 | + "treatment": obs_causal_test_result.estimator.treatment_value, |
| 160 | + "smt_risk_ratio": smt_causal_test_result.test_value.value, |
| 161 | + "obs_risk_ratio": obs_causal_test_result.test_value.value[0], |
| 162 | + } |
| 163 | + for obs_causal_test_result, smt_causal_test_result in zip( |
| 164 | + test_suite_results[base_test_case]["LinearRegressionEstimator"], |
| 165 | + test_suite_results[base_test_case]["EmpiricalMeanEstimator"], |
| 166 | + ) |
| 167 | + ] |
| 168 | + intensity_num_shapes_results = pd.DataFrame(intensity_num_shapes_results) |
| 169 | + if save: |
| 170 | + intensity_num_shapes_results.to_csv("intensity_num_shapes_results_random_1000.csv") |
| 171 | + logger.info("%s", intensity_num_shapes_results) |
| 172 | + |
| 173 | + |
| 174 | +def test_poisson_width_num_shapes(save=False): |
| 175 | + base_test_case = BaseTestCase(treatment_variable=width, outcome_variable=num_shapes_unit) |
| 176 | + causal_test_case_list = [ |
| 177 | + CausalTestCase( |
| 178 | + base_test_case=base_test_case, |
| 179 | + expected_causal_effect=Positive(), |
| 180 | + control_value=float(w), |
| 181 | + treatment_value=w + 1.0, |
| 182 | + estimate_type="ate_calculated", |
| 183 | + effect_modifier_configuration={"intensity": i}, |
| 184 | + ) |
| 185 | + for i in range(17) |
| 186 | + for w in range(1, 10) |
| 187 | + ] |
| 188 | + test_suite = CausalTestSuite() |
| 189 | + test_suite.add_test_object( |
| 190 | + base_test_case, |
| 191 | + causal_test_case_list=causal_test_case_list, |
| 192 | + estimators=[LinearRegressionEstimator], |
| 193 | + ) |
| 194 | + test_suite_results = test_suite.execute_test_suite( |
| 195 | + causal_specification, pd.read_csv(observational_data_path, index_col=0).astype(float) |
| 196 | + ) |
| 197 | + width_num_shapes_results = [ |
| 198 | + { |
| 199 | + "control": causal_test_result.estimator.control_value, |
| 200 | + "treatment": causal_test_result.estimator.treatment_value, |
| 201 | + "intensity": causal_test_result.effect_modifier_configuration["intensity"], |
| 202 | + "ate": causal_test_result.test_value.value[0], |
| 203 | + "ci_low": causal_test_result.confidence_intervals[0][0], |
| 204 | + "ci_high": causal_test_result.confidence_intervals[1][0], |
| 205 | + } |
| 206 | + for causal_test_result in test_suite_results[base_test_case]["LinearRegressionEstimator"] |
| 207 | + ] |
| 208 | + width_num_shapes_results = pd.DataFrame(width_num_shapes_results) |
| 209 | + if save: |
| 210 | + width_num_shapes_results.to_csv("width_num_shapes_results_random_1000.csv") |
| 211 | + logger.info("%s", width_num_shapes_results) |
| 212 | + |
| 213 | + |
| 214 | +if __name__ == "__main__": |
| 215 | + test_poisson_intensity_num_shapes(save=False) |
| 216 | + # test_poisson_width_num_shapes(save=True) |
0 commit comments