Skip to content

Commit 5d6c8eb

Browse files
committed
doctest clean and added to github actions
1 parent 2220ed0 commit 5d6c8eb

File tree

7 files changed

+124
-57
lines changed

7 files changed

+124
-57
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ jobs:
3434
uses: actions/setup-python@v3
3535
with:
3636
python-version: ${{ matrix.python-version }}
37+
- name: Run doctests
38+
run: |
39+
pip install -e .[test]
40+
pytest --doctest-modules causalpy/
3741
- name: Run tests
3842
run: |
3943
pip install -e .[test]

causalpy/data/simulate_data.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ def generate_synthetic_control_data(
5656
5757
Example
5858
--------
59+
>>> from causalpy.data.simulate_data import generate_synthetic_control_data
5960
>>> df, weightings_true = generate_synthetic_control_data(
60-
... treatment_time=treatment_time
61+
... treatment_time=70
6162
... )
6263
"""
6364

@@ -196,6 +197,7 @@ def generate_did():
196197
197198
Example
198199
--------
200+
>>> from causalpy.data.simulate_data import generate_did
199201
>>> df = generate_did()
200202
"""
201203
# true parameters
@@ -249,6 +251,7 @@ def generate_regression_discontinuity_data(
249251
Example
250252
--------
251253
>>> import pathlib
254+
>>> from causalpy.data.simulate_data import generate_regression_discontinuity_data
252255
>>> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
253256
>>> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv', index=False)
254257
"""
@@ -278,9 +281,10 @@ def generate_ancova_data(
278281
Example
279282
--------
280283
>>> import pathlib
284+
>>> from causalpy.data.simulate_data import generate_ancova_data
281285
>>> df = generate_ancova_data(
282286
... N=200,
283-
... pre_treatment_threshold=np.array([10, 12]),
287+
... pre_treatment_means=np.array([10, 12]),
284288
... treatment_effect=2,
285289
... sigma=1
286290
... )

causalpy/pymc_experiments.py

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def summary(self) -> None:
359359
... ),
360360
... )
361361
>>> result.summary()
362-
===============================Synthetic Control===============================
362+
==================================Pre-Post Fit==================================
363363
Formula: actual ~ 0 + a + b + c + d + e + f + g
364364
Model coefficients:
365365
a 0.33, 94% HDI [0.30, 0.38]
@@ -757,7 +757,7 @@ def _plot_causal_impact_arrow(self, ax):
757757
def _causal_impact_summary_stat(self) -> str:
758758
"""Computes the mean and 94% credible interval bounds for the causal impact."""
759759
percentiles = self.causal_impact.quantile([0.03, 1 - 0.03]).values
760-
ci = r"$CI_{94\%}$" + f"[{percentiles[0]:.2f}, {percentiles[1]:.2f}]"
760+
ci = "$CI_{94%}$" + f"[{percentiles[0]:.2f}, {percentiles[1]:.2f}]"
761761
causal_impact = f"{self.causal_impact.mean():.2f}, "
762762
return f"Causal impact = {causal_impact + ci}"
763763

@@ -767,16 +767,31 @@ def summary(self) -> None:
767767
768768
Example
769769
--------
770-
Assuming `result` is a DiD experiment
771-
770+
>>> import causalpy as cp
771+
>>> df = cp.load_data("did")
772+
>>> seed = 42
773+
>>> result = cp.pymc_experiments.DifferenceInDifferences(
774+
... df,
775+
... formula="y ~ 1 + group*post_treatment",
776+
... time_variable_name="t",
777+
... group_variable_name="group",
778+
... model=cp.pymc_models.LinearRegression(
779+
... sample_kwargs={
780+
... "target_accept": 0.95,
781+
... "random_seed": seed,
782+
... "progressbar": False,
783+
... }
784+
... )
785+
... )
772786
>>> result.summary()
773-
==========================Difference in Differences=========================
787+
===========================Difference in Differences============================
774788
Formula: y ~ 1 + group*post_treatment
789+
<BLANKLINE>
775790
Results:
776791
Causal impact = 0.51, $CI_{94%}$[0.41, 0.61]
777792
Model coefficients:
778793
Intercept 1.08, 94% HDI [1.03, 1.13]
779-
post_treatment[T.True] 0.98, 94% HDI [0.91, 1.06]
794+
post_treatment[T.True] 0.98, 94% HDI [0.92, 1.05]
780795
group 0.16, 94% HDI [0.09, 0.23]
781796
group:post_treatment[T.True] 0.51, 94% HDI [0.41, 0.61]
782797
sigma 0.08, 94% HDI [0.07, 0.10]
@@ -995,19 +1010,35 @@ def summary(self) -> None:
9951010
9961011
Example
9971012
--------
1013+
>>> import causalpy as cp
1014+
>>> df = cp.load_data("rd")
1015+
>>> seed = 42
1016+
>>> result = cp.pymc_experiments.RegressionDiscontinuity(
1017+
... df,
1018+
... formula="y ~ 1 + x + treated + x:treated",
1019+
... model=cp.pymc_models.LinearRegression(
1020+
... sample_kwargs={
1021+
... "target_accept": 0.95,
1022+
... "random_seed": seed,
1023+
... "progressbar": False,
1024+
... },
1025+
... ),
1026+
... treatment_threshold=0.5,
1027+
... )
9981028
>>> result.summary()
999-
============================Regression Discontinuity==========================
1029+
============================Regression Discontinuity============================
10001030
Formula: y ~ 1 + x + treated + x:treated
10011031
Running variable: x
10021032
Threshold on running variable: 0.5
1033+
<BLANKLINE>
10031034
Results:
1004-
Discontinuity at threshold = 0.92
1035+
Discontinuity at threshold = 0.91
10051036
Model coefficients:
1006-
Intercept 0.09, 94% HDI [0.00, 0.17]
1007-
treated[T.True] 2.48, 94% HDI [1.66, 3.27]
1037+
Intercept 0.09, 94% HDI [-0.00, 0.17]
1038+
treated[T.True] 2.45, 94% HDI [1.66, 3.28]
10081039
x 1.32, 94% HDI [1.14, 1.50]
1009-
x:treated[T.True] -3.12, 94% HDI [-4.17, -2.05]
1010-
sigma 0.35, 94% HDI [0.31, 0.41]
1040+
x:treated[T.True] -3.08, 94% HDI [-4.17, -2.05]
1041+
sigma 0.36, 94% HDI [0.31, 0.41]
10111042
"""
10121043

10131044
print(f"{self.expt_type:=^80}")
@@ -1182,7 +1213,7 @@ def plot(self):
11821213
def _causal_impact_summary_stat(self) -> str:
11831214
"""Computes the mean and 94% credible interval bounds for the causal impact."""
11841215
percentiles = self.causal_impact.quantile([0.03, 1 - 0.03]).values
1185-
ci = r"$CI_{94\%}$" + f"[{percentiles[0]:.2f}, {percentiles[1]:.2f}]"
1216+
ci = r"$CI_{94%}$" + f"[{percentiles[0]:.2f}, {percentiles[1]:.2f}]"
11861217
causal_impact = f"{self.causal_impact.mean():.2f}, "
11871218
return f"Causal impact = {causal_impact + ci}"
11881219

@@ -1192,14 +1223,31 @@ def summary(self) -> None:
11921223
11931224
Example
11941225
--------
1226+
>>> import causalpy as cp
1227+
>>> df = cp.load_data("anova1")
1228+
>>> seed = 42
1229+
>>> result = cp.pymc_experiments.PrePostNEGD(
1230+
... df,
1231+
... formula="post ~ 1 + C(group) + pre",
1232+
... group_variable_name="group",
1233+
... pretreatment_variable_name="pre",
1234+
... model=cp.pymc_models.LinearRegression(
1235+
... sample_kwargs={
1236+
... "target_accept": 0.95,
1237+
... "random_seed": seed,
1238+
... "progressbar": False,
1239+
... }
1240+
... )
1241+
... )
11951242
>>> result.summary()
1196-
=================Pretest/posttest Nonequivalent Group Design================
1243+
==================Pretest/posttest Nonequivalent Group Design===================
11971244
Formula: post ~ 1 + C(group) + pre
1245+
<BLANKLINE>
11981246
Results:
1199-
Causal impact = 1.89, $CI_{94%}$[1.70, 2.07]
1247+
Causal impact = 1.88, $CI_{94%}$[1.69, 2.07]
12001248
Model coefficients:
1201-
Intercept -0.46, 94% HDI [-1.17, 0.22]
1202-
C(group)[T.1] 1.89, 94% HDI [1.70, 2.07]
1249+
Intercept -0.47, 94% HDI [-1.16, 0.24]
1250+
C(group)[T.1] 1.88, 94% HDI [1.69, 2.07]
12031251
pre 1.05, 94% HDI [0.98, 1.12]
12041252
sigma 0.51, 94% HDI [0.46, 0.56]
12051253

causalpy/pymc_models.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def fit(self, X, y, coords: Optional[Dict[str, Any]] = None) -> None:
9999
>>> model = MyToyModel(
100100
... sample_kwargs={"chains": 2, "draws": 2, "progressbar": False}
101101
... )
102-
>>> model.fit(X, y) # doctest: +ELLIPSIS
102+
>>> model.fit(X, y)
103103
Inference ...
104104
"""
105105
self.build_model(X, y, coords)
@@ -139,10 +139,10 @@ def predict(self, X):
139139
>>> model = MyToyModel(
140140
... sample_kwargs={"chains": 2, "draws": 2, "progressbar": False}
141141
... )
142-
>>> model.fit(X, y) # doctest: +ELLIPSIS
142+
>>> model.fit(X, y)
143143
Inference...
144144
>>> X_new = rng.normal(loc=0, scale=1, size=(20,2))
145-
>>> model.predict(X_new) # doctest: +ELLIPSIS
145+
>>> model.predict(X_new)
146146
Inference...
147147
"""
148148

@@ -177,17 +177,16 @@ def score(self, X, y) -> pd.Series:
177177
... mu = pm.Deterministic("mu", pm.math.dot(X_, beta))
178178
... pm.Normal("y_hat", mu=mu, sigma=sigma, observed=y_)
179179
>>> rng = np.random.default_rng(seed=42)
180-
>>> X = rng.normal(loc=0, scale=1, size=(20, 2))
181-
>>> y = rng.normal(loc=0, scale=1, size=(20,))
180+
>>> X = rng.normal(loc=0, scale=1, size=(200, 2))
181+
>>> y = rng.normal(loc=0, scale=1, size=(200,))
182182
>>> model = MyToyModel(
183-
... sample_kwargs={"chains": 2, "draws": 200, "progressbar": False}
183+
... sample_kwargs={"chains": 2, "draws": 2000, "progressbar": False}
184184
... )
185-
>>> model.fit(X, y) # doctest: +ELLIPSIS
185+
>>> model.fit(X, y)
186186
Inference...
187-
>>> model.score(X, y)
188-
Sampling: [y_hat]
189-
r2 0.376489
190-
r2_std 0.081305
187+
>>> round(model.score(X, y),2) # using round() to simplify doctest
188+
r2 0.34
189+
r2_std 0.02
191190
dtype: float64
192191
"""
193192
yhat = self.predict(X)
@@ -223,7 +222,8 @@ class WeightedSumFitter(ModelBuilder):
223222
>>> X = sc[['a', 'b', 'c', 'd', 'e', 'f', 'g']]
224223
>>> y = np.asarray(sc['actual']).reshape((sc.shape[0], 1))
225224
>>> wsf = WeightedSumFitter(sample_kwargs={"progressbar": False})
226-
>>> _ = wsf.fit(X,y)
225+
>>> wsf.fit(X,y)
226+
Inference ...
227227
"""
228228

229229
def build_model(self, X, y, coords):
@@ -279,7 +279,7 @@ class LinearRegression(ModelBuilder):
279279
... 'coeffs': ['x', 'treated'],
280280
... 'obs_indx': np.arange(rd.shape[0])
281281
... },
282-
... ) # doctest: +ELLIPSIS
282+
... )
283283
Inference...
284284
"""
285285

causalpy/skl_experiments.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ class PrePostFit(ExperimentalDesign):
5959
... formula="actual ~ 0 + a + b + c + d + e + f + g",
6060
... model = cp.skl_models.WeightedProportion()
6161
... )
62-
6362
"""
6463

6564
def __init__(
@@ -181,10 +180,18 @@ def get_coeffs(self):
181180
182181
Example
183182
--------
183+
>>> from sklearn.linear_model import LinearRegression
184+
>>> import causalpy as cp
185+
>>> df = cp.load_data("sc")
186+
>>> treatment_time = 70
187+
>>> result = cp.skl_experiments.PrePostFit(
188+
... df,
189+
... treatment_time,
190+
... formula="actual ~ 0 + a + b + c + d + e + f + g",
191+
... model = cp.skl_models.WeightedProportion()
192+
... )
184193
>>> result.get_coeffs()
185-
array([3.97370896e-01, 1.53881980e-01, 4.48747123e-01, 1.04639857e-16,
186-
0.00000000e+00, 0.00000000e+00, 2.92931287e-16])
187-
194+
array(...)
188195
"""
189196
return np.squeeze(self.model.coef_)
190197

@@ -262,7 +269,6 @@ class SyntheticControl(PrePostFit):
262269
... formula="actual ~ 0 + a + b + c + d + e + f + g",
263270
... model = cp.skl_models.WeightedProportion()
264271
... )
265-
266272
"""
267273

268274
def plot(self, plot_predictors=False, **kwargs):
@@ -293,21 +299,22 @@ class DifferenceInDifferences(ExperimentalDesign):
293299
:param group_variable_name:
294300
Name of the data column for the group variable
295301
:param model:
296-
A PyMC model for difference in differences
302+
An skl model for difference in differences
297303
298304
Example
299305
--------
306+
>>> import causalpy as cp
307+
>>> from sklearn.linear_model import LinearRegression
300308
>>> df = cp.load_data("did")
301309
>>> result = cp.skl_experiments.DifferenceInDifferences(
302-
... data,
310+
... df,
303311
... formula="y ~ 1 + group*post_treatment",
304312
... time_variable_name="t",
305313
... group_variable_name="group",
306314
... treated=1,
307315
... untreated=0,
308316
... model=LinearRegression(),
309317
... )
310-
311318
"""
312319

313320
def __init__(
@@ -497,14 +504,15 @@ class RegressionDiscontinuity(ExperimentalDesign):
497504
498505
Example
499506
--------
507+
>>> import causalpy as cp
508+
>>> from sklearn.linear_model import LinearRegression
500509
>>> data = cp.load_data("rd")
501510
>>> result = cp.skl_experiments.RegressionDiscontinuity(
502511
... data,
503512
... formula="y ~ 1 + x + treated",
504513
... model=LinearRegression(),
505514
... treatment_threshold=0.5,
506515
... )
507-
508516
"""
509517

510518
def __init__(
@@ -640,18 +648,27 @@ def summary(self):
640648
641649
Example
642650
--------
643-
>>> result.summary()
651+
>>> import causalpy as cp
652+
>>> from sklearn.linear_model import LinearRegression
653+
>>> data = cp.load_data("rd")
654+
>>> result = cp.skl_experiments.RegressionDiscontinuity(
655+
... data,
656+
... formula="y ~ 1 + x + treated",
657+
... model=LinearRegression(),
658+
... treatment_threshold=0.5,
659+
... )
660+
>>> result.summary() # doctest: +NORMALIZE_WHITESPACE
644661
Difference in Differences experiment
645662
Formula: y ~ 1 + x + treated
646663
Running variable: x
647664
Threshold on running variable: 0.5
665+
<BLANKLINE>
648666
Results:
649667
Discontinuity at threshold = 0.19
650668
Model coefficients:
651-
Intercept 0.0
652-
treated[T.True] 0.19034196317793994
653-
x 1.229600855360073
654-
669+
Intercept 0.0
670+
treated[T.True] 0.19034196317793994
671+
x 1.229600855360073
655672
"""
656673
print("Difference in Differences experiment")
657674
print(f"Formula: {self.formula}")

0 commit comments

Comments
 (0)