@@ -34,129 +34,6 @@ def add_modelling_assumptions(self):
34
34
self .modelling_assumptions .append ("The outcome must be binary." )
35
35
self .modelling_assumptions .append ("Independently and identically distributed errors." )
36
36
37
- def _predict (self , data = None , adjustment_config : dict = None ) -> pd .DataFrame :
38
- """Estimate the outcomes under control and treatment.
39
-
40
- :param data: The data to use, defaults to `self.df`. Controllable for boostrap sampling.
41
- :param: adjustment_config: The values of the adjustment variables to use.
42
-
43
- :return: The estimated outcome under control and treatment, with confidence intervals in the form of a
44
- dataframe with columns "predicted", "se", "ci_lower", and "ci_upper".
45
- """
46
- if adjustment_config is None :
47
- adjustment_config = {}
48
- if set (self .adjustment_set ) != set (adjustment_config ):
49
- raise ValueError (
50
- f"Invalid adjustment configuration { adjustment_config } . Must specify values for { self .adjustment_set } "
51
- )
52
-
53
- return super ()._predict (data , adjustment_config )
54
-
55
- def estimate_control_treatment (
56
- self , adjustment_config : dict = None , bootstrap_size : int = 100
57
- ) -> tuple [pd .Series , pd .Series ]:
58
- """Estimate the outcomes under control and treatment.
59
-
60
- :return: The estimated control and treatment values and their confidence
61
- intervals in the form ((ci_low, control, ci_high), (ci_low, treatment, ci_high)).
62
- """
63
- if adjustment_config is None :
64
- adjustment_config = {}
65
- y = self ._predict (self .df , adjustment_config = adjustment_config )["predicted" ]
66
-
67
- try :
68
- bootstrap_samples = [
69
- self ._predict (self .df .sample (len (self .df ), replace = True ), adjustment_config = adjustment_config )[
70
- "predicted"
71
- ]
72
- for _ in range (bootstrap_size )
73
- ]
74
- control , treatment = zip (* [(x .iloc [1 ], x .iloc [0 ]) for x in bootstrap_samples ])
75
- except PerfectSeparationError :
76
- logger .warning (
77
- "Perfect separation detected, results not available. Cannot calculate confidence intervals for such "
78
- "a small dataset."
79
- )
80
- return (y .iloc [1 ], None ), (y .iloc [0 ], None )
81
- except np .linalg .LinAlgError :
82
- logger .warning ("Singular matrix detected. Confidence intervals not available. Try with a larger data set" )
83
- return (y .iloc [1 ], None ), (y .iloc [0 ], None )
84
-
85
- # Delta method confidence intervals from
86
- # https://stackoverflow.com/questions/47414842/confidence-interval-of-probability-prediction-from-logistic-regression-statsmode
87
- # cov = model.cov_params()
88
- # gradient = (y * (1 - y) * x.T).T # matrix of gradients for each observation
89
- # std_errors = np.array([np.sqrt(np.dot(np.dot(g, cov), g)) for g in gradient.to_numpy()])
90
- # c = 1.96 # multiplier for confidence interval
91
- # upper = np.maximum(0, np.minimum(1, y + std_errors * c))
92
- # lower = np.maximum(0, np.minimum(1, y - std_errors * c))
93
-
94
- return (y .iloc [1 ], np .array (control )), (y .iloc [0 ], np .array (treatment ))
95
-
96
- def estimate_ate (self , adjustment_config : dict = None , bootstrap_size : int = 100 ) -> float :
97
- """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
98
- by changing the treatment variable from the control value to the treatment value. Here, we actually
99
- calculate the expected outcomes under control and treatment and take one away from the other. This
100
- allows for custom terms to be put in such as squares, inverses, products, etc.
101
-
102
- :return: The estimated average treatment effect and 95% confidence intervals
103
- """
104
- if adjustment_config is None :
105
- adjustment_config = {}
106
- (control_outcome , control_bootstraps ), (
107
- treatment_outcome ,
108
- treatment_bootstraps ,
109
- ) = self .estimate_control_treatment (bootstrap_size = bootstrap_size , adjustment_config = adjustment_config )
110
- estimate = treatment_outcome - control_outcome
111
-
112
- if control_bootstraps is None or treatment_bootstraps is None :
113
- return estimate , (None , None )
114
-
115
- bootstraps = sorted (list (treatment_bootstraps - control_bootstraps ))
116
- bound = int ((bootstrap_size * self .alpha ) / 2 )
117
- ci_low = bootstraps [bound ]
118
- ci_high = bootstraps [bootstrap_size - bound ]
119
-
120
- logger .info (
121
- f"Changing { self .treatment } from { self .control_value } to { self .treatment_value } gives an estimated "
122
- f"ATE of { ci_low } < { estimate } < { ci_high } "
123
- )
124
- assert ci_low < estimate < ci_high , f"Expecting { ci_low } < { estimate } < { ci_high } "
125
-
126
- return estimate , (ci_low , ci_high )
127
-
128
- def estimate_risk_ratio (self , adjustment_config : dict = None , bootstrap_size : int = 100 ) -> float :
129
- """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
130
- by changing the treatment variable from the control value to the treatment value. Here, we actually
131
- calculate the expected outcomes under control and treatment and divide one by the other. This
132
- allows for custom terms to be put in such as squares, inverses, products, etc.
133
-
134
- :return: The estimated risk ratio and 95% confidence intervals.
135
- """
136
- if adjustment_config is None :
137
- adjustment_config = {}
138
- (control_outcome , control_bootstraps ), (
139
- treatment_outcome ,
140
- treatment_bootstraps ,
141
- ) = self .estimate_control_treatment (bootstrap_size = bootstrap_size , adjustment_config = adjustment_config )
142
- estimate = treatment_outcome / control_outcome
143
-
144
- if control_bootstraps is None or treatment_bootstraps is None :
145
- return estimate , (None , None )
146
-
147
- bootstraps = sorted (list (treatment_bootstraps / control_bootstraps ))
148
- bound = ceil ((bootstrap_size * self .alpha ) / 2 )
149
- ci_low = bootstraps [bound ]
150
- ci_high = bootstraps [bootstrap_size - bound ]
151
-
152
- logger .info (
153
- f"Changing { self .treatment } from { self .control_value } to { self .treatment_value } gives an estimated "
154
- f"risk ratio of { ci_low } < { estimate } < { ci_high } "
155
- )
156
- assert ci_low < estimate < ci_high , f"Expecting { ci_low } < { estimate } < { ci_high } "
157
-
158
- return estimate , (ci_low , ci_high )
159
-
160
37
def estimate_unit_odds_ratio (self ) -> float :
161
38
"""Estimate the odds ratio of increasing the treatment by one. In logistic regression, this corresponds to the
162
39
coefficient of the treatment of interest.
0 commit comments