Skip to content

Commit 2220ed0

Browse files
committed
doctest good on pymc experiments except for summaries
1 parent 5c2870e commit 2220ed0

File tree

2 files changed

+92
-111
lines changed

2 files changed

+92
-111
lines changed

causalpy/pymc_experiments.py

Lines changed: 89 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -55,38 +55,22 @@ def idata(self):
5555
5656
Example
5757
--------
58-
If `result` is the result of the Difference in Differences experiment example
59-
60-
>>> result.idata
61-
Inference data with groups:
62-
> posterior
63-
> posterior_predictive
64-
> sample_stats
65-
> prior
66-
> prior_predictive
67-
> observed_data
68-
> constant_data
69-
>>> result.idata.posterior
58+
>>> import causalpy as cp
59+
>>> df = cp.load_data("did")
60+
>>> seed = 42
61+
>>> result = cp.pymc_experiments.DifferenceInDifferences(
62+
... df,
63+
... formula="y ~ 1 + group*post_treatment",
64+
... time_variable_name="t",
65+
... group_variable_name="group",
66+
... model=cp.pymc_models.LinearRegression(
67+
... sample_kwargs={"random_seed": seed, "progressbar": False}),
68+
... )
69+
>>> result.idata # doctest: +ELLIPSIS
70+
Inference data...
71+
>>> result.idata.posterior # doctest: +ELLIPSIS
7072
<xarray.Dataset>
71-
Dimensions: (chain: 4, draw: 1000, coeffs: 4, obs_ind: 40)
72-
Coordinates:
73-
* chain (chain) int64 0 1 2 3
74-
* draw (draw) int64 0 1 2 3 4 5 6 7 8 ... 992 993 994 995 996 997 998
75-
999
76-
* coeffs (coeffs) <U28 'Intercept' ... 'group:post_treatment[T.True]'
77-
* obs_ind (obs_ind) int64 0 1 2 3 4 5 6 7 8 9 ... 31 32 33 34 35 36 37
78-
38 39
79-
Data variables:
80-
beta (chain, draw, coeffs) float64 1.04 1.013 0.173 ... 0.1873 0.5225
81-
sigma (chain, draw) float64 0.09331 0.1031 0.1024 ... 0.0824 0.06907
82-
mu (chain, draw, obs_ind) float64 1.04 2.053 1.213 ... 1.265 2.747
83-
Attributes:
84-
created_at: 2023-08-23T20:03:45.709265
85-
arviz_version: 0.16.1
86-
inference_library: pymc
87-
inference_library_version: 5.7.2
88-
sampling_time: 0.8851289749145508
89-
tuning_steps: 1000
73+
Dimensions...
9074
"""
9175

9276
return self.model.idata
@@ -97,8 +81,17 @@ def print_coefficients(self) -> None:
9781
9882
Example
9983
--------
100-
If `result` is from the Difference in Differences experiment example
101-
84+
>>> import causalpy as cp
85+
>>> df = cp.load_data("did")
86+
>>> seed = 42
87+
>>> result = cp.pymc_experiments.DifferenceInDifferences(
88+
... df,
89+
... formula="y ~ 1 + group*post_treatment",
90+
... time_variable_name="t",
91+
... group_variable_name="group",
92+
... model=cp.pymc_models.LinearRegression(
93+
... sample_kwargs={"random_seed": seed, "progressbar": False}),
94+
... )
10295
>>> result.print_coefficients()
10396
Model coefficients:
10497
Intercept 1.08, 94% HDI [1.03, 1.13]
@@ -140,6 +133,7 @@ class PrePostFit(ExperimentalDesign):
140133
141134
Example
142135
--------
136+
>>> import causalpy as cp
143137
>>> sc = cp.load_data("sc")
144138
>>> treatment_time = 70
145139
>>> seed = 42
@@ -148,20 +142,13 @@ class PrePostFit(ExperimentalDesign):
148142
... treatment_time,
149143
... formula="actual ~ 0 + a + b + c + d + e + f + g",
150144
... model=cp.pymc_models.WeightedSumFitter(
151-
... sample_kwargs={"target_accept": 0.95, "random_seed": seed}
145+
... sample_kwargs={
146+
... "target_accept": 0.95,
147+
... "random_seed": seed,
148+
... "progressbar": False
149+
... }
152150
... ),
153151
... )
154-
Auto-assigning NUTS sampler...
155-
Initializing NUTS using jitter+adapt_diag...
156-
Multiprocess sampling (4 chains in 4 jobs)
157-
NUTS: [beta, sigma]
158-
Sampling 4 chains for 1_000 tune and 1_000 draw iterations
159-
(4_000 + 4_000 draws total) took 11 seconds.
160-
Sampling: [beta, sigma, y_hat]
161-
Sampling: [y_hat]
162-
Sampling: [y_hat]
163-
Sampling: [y_hat]
164-
Sampling: [y_hat]
165152
"""
166153

167154
def __init__(
@@ -249,8 +236,7 @@ def plot(self, counterfactual_label="Counterfactual", **kwargs):
249236
250237
Example
251238
--------
252-
>>> result.plot()
253-
239+
>>> result.plot() # doctest: +SKIP
254240
"""
255241
fig, ax = plt.subplots(3, 1, sharex=True, figsize=(7, 8))
256242

@@ -356,6 +342,22 @@ def summary(self) -> None:
356342
357343
Example
358344
---------
345+
>>> import causalpy as cp
346+
>>> sc = cp.load_data("sc")
347+
>>> treatment_time = 70
348+
>>> seed = 42
349+
>>> result = cp.pymc_experiments.PrePostFit(
350+
... sc,
351+
... treatment_time,
352+
... formula="actual ~ 0 + a + b + c + d + e + f + g",
353+
... model=cp.pymc_models.WeightedSumFitter(
354+
... sample_kwargs={
355+
... "target_accept": 0.95,
356+
... "random_seed": seed,
357+
... "progressbar": False,
358+
... }
359+
... ),
360+
... )
359361
>>> result.summary()
360362
===============================Synthetic Control===============================
361363
Formula: actual ~ 0 + a + b + c + d + e + f + g
@@ -391,6 +393,7 @@ class InterruptedTimeSeries(PrePostFit):
391393
392394
Example
393395
--------
396+
>>> import causalpy as cp
394397
>>> df = (
395398
... cp.load_data("its")
396399
... .assign(date=lambda x: pd.to_datetime(x["date"]))
@@ -402,19 +405,14 @@ class InterruptedTimeSeries(PrePostFit):
402405
... df,
403406
... treatment_time,
404407
... formula="y ~ 1 + t + C(month)",
405-
... model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
408+
... model=cp.pymc_models.LinearRegression(
409+
... sample_kwargs={
410+
... "target_accept": 0.95,
411+
... "random_seed": seed,
412+
... "progressbar": False,
413+
... }
414+
... )
406415
... )
407-
Auto-assigning NUTS sampler...
408-
Initializing NUTS using jitter+adapt_diag...
409-
Multiprocess sampling (4 chains in 4 jobs)
410-
NUTS: [beta, sigma]
411-
Sampling 4 chains for 1_000 tune and 1_000 draw iterations
412-
(4_000 + 4_000 draws total) took 3 seconds.
413-
Sampling: [beta, sigma, y_hat]
414-
Sampling: [y_hat]
415-
Sampling: [y_hat]
416-
Sampling: [y_hat]
417-
Sampling: [y_hat]
418416
"""
419417

420418
expt_type = "Interrupted Time Series"
@@ -434,6 +432,7 @@ class SyntheticControl(PrePostFit):
434432
435433
Example
436434
--------
435+
>>> import causalpy as cp
437436
>>> df = cp.load_data("sc")
438437
>>> treatment_time = 70
439438
>>> seed = 42
@@ -442,20 +441,13 @@ class SyntheticControl(PrePostFit):
442441
... treatment_time,
443442
... formula="actual ~ 0 + a + b + c + d + e + f + g",
444443
... model=cp.pymc_models.WeightedSumFitter(
445-
... sample_kwargs={"target_accept": 0.95, "random_seed": seed}
444+
... sample_kwargs={
445+
... "target_accept": 0.95,
446+
... "random_seed": seed,
447+
... "progressbar": False,
448+
... }
446449
... ),
447450
... )
448-
Auto-assigning NUTS sampler...
449-
Initializing NUTS using jitter+adapt_diag...
450-
Multiprocess sampling (4 chains in 4 jobs)
451-
NUTS: [beta, sigma]
452-
Sampling 4 chains for 1_000 tune and 1_000 draw iterations
453-
(4_000 + 4_000 draws total) took 11 seconds.
454-
Sampling: [beta, sigma, y_hat]
455-
Sampling: [y_hat]
456-
Sampling: [y_hat]
457-
Sampling: [y_hat]
458-
Sampling: [y_hat]
459451
"""
460452

461453
expt_type = "Synthetic Control"
@@ -492,26 +484,22 @@ class DifferenceInDifferences(ExperimentalDesign):
492484
493485
Example
494486
--------
487+
>>> import causalpy as cp
495488
>>> df = cp.load_data("did")
496489
>>> seed = 42
497490
>>> result = cp.pymc_experiments.DifferenceInDifferences(
498491
... df,
499492
... formula="y ~ 1 + group*post_treatment",
500493
... time_variable_name="t",
501494
... group_variable_name="group",
502-
... model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
495+
... model=cp.pymc_models.LinearRegression(
496+
... sample_kwargs={
497+
... "target_accept": 0.95,
498+
... "random_seed": seed,
499+
... "progressbar": False,
500+
... }
501+
... )
503502
... )
504-
Auto-assigning NUTS sampler...
505-
Initializing NUTS using jitter+adapt_diag...
506-
Multiprocess sampling (4 chains in 4 jobs)
507-
NUTS: [beta, sigma]
508-
Sampling 4 chains for 1_000 tune and 1_000 draw iterations
509-
(4_000 + 4_000 draws total) took 1 seconds.
510-
Sampling: [beta, sigma, y_hat]
511-
Sampling: [y_hat]
512-
Sampling: [y_hat]
513-
Sampling: [y_hat]
514-
Sampling: [y_hat]
515503
"""
516504

517505
def __init__(
@@ -637,7 +625,7 @@ def plot(self):
637625
--------
638626
Assuming `result` is the result of a DiD experiment:
639627
640-
>>> result.plot()
628+
>>> result.plot() # doctest: +SKIP
641629
"""
642630
fig, ax = plt.subplots()
643631

@@ -825,25 +813,21 @@ class RegressionDiscontinuity(ExperimentalDesign):
825813
826814
Example
827815
--------
816+
>>> import causalpy as cp
828817
>>> df = cp.load_data("rd")
829818
>>> seed = 42
830819
>>> result = cp.pymc_experiments.RegressionDiscontinuity(
831820
... df,
832821
... formula="y ~ 1 + x + treated + x:treated",
833-
... model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
822+
... model=cp.pymc_models.LinearRegression(
823+
... sample_kwargs={
824+
... "target_accept": 0.95,
825+
... "random_seed": seed,
826+
... "progressbar": False,
827+
... },
828+
... ),
834829
... treatment_threshold=0.5,
835830
... )
836-
Auto-assigning NUTS sampler...
837-
Initializing NUTS using jitter+adapt_diag...
838-
Multiprocess sampling (4 chains in 4 jobs)
839-
NUTS: [beta, sigma]
840-
Sampling 4 chains for 1_000 tune and 1_000 draw iterations
841-
(4_000 + 4_000 draws total) took 2 seconds.
842-
Sampling: [beta, sigma, y_hat]
843-
Sampling: [y_hat]
844-
Sampling: [y_hat]
845-
Sampling: [y_hat]
846-
Sampling: [y_hat]
847831
"""
848832

849833
def __init__(
@@ -959,7 +943,7 @@ def plot(self):
959943
960944
Example
961945
--------
962-
>>> result.plot()
946+
>>> result.plot() # doctest: +SKIP
963947
"""
964948
fig, ax = plt.subplots()
965949
# Plot raw data
@@ -1054,25 +1038,22 @@ class PrePostNEGD(ExperimentalDesign):
10541038
10551039
Example
10561040
--------
1041+
>>> import causalpy as cp
10571042
>>> df = cp.load_data("anova1")
10581043
>>> seed = 42
10591044
>>> result = cp.pymc_experiments.PrePostNEGD(
10601045
... df,
10611046
... formula="post ~ 1 + C(group) + pre",
10621047
... group_variable_name="group",
10631048
... pretreatment_variable_name="pre",
1064-
... model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
1049+
... model=cp.pymc_models.LinearRegression(
1050+
... sample_kwargs={
1051+
... "target_accept": 0.95,
1052+
... "random_seed": seed,
1053+
... "progressbar": False,
1054+
... }
1055+
... )
10651056
... )
1066-
Auto-assigning NUTS sampler...
1067-
Initializing NUTS using jitter+adapt_diag...
1068-
Multiprocess sampling (4 chains in 4 jobs)
1069-
NUTS: [beta, sigma]
1070-
Sampling 4 chains for 1_000 tune and 1_000 draw iterations
1071-
(4_000 + 4_000 draws total) took 3 seconds.
1072-
Sampling: [beta, sigma, y_hat]
1073-
Sampling: [y_hat]
1074-
Sampling: [y_hat]
1075-
Sampling: [y_hat]
10761057
"""
10771058

10781059
def __init__(

causalpy/pymc_models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,14 @@ def score(self, X, y) -> pd.Series:
180180
>>> X = rng.normal(loc=0, scale=1, size=(20, 2))
181181
>>> y = rng.normal(loc=0, scale=1, size=(20,))
182182
>>> model = MyToyModel(
183-
... sample_kwargs={"chains": 2, "draws": 2, "progressbar": False}
183+
... sample_kwargs={"chains": 2, "draws": 200, "progressbar": False}
184184
... )
185185
>>> model.fit(X, y) # doctest: +ELLIPSIS
186186
Inference...
187187
>>> model.score(X, y)
188188
Sampling: [y_hat]
189-
r2 0.352251
190-
r2_std 0.051624
189+
r2 0.376489
190+
r2_std 0.081305
191191
dtype: float64
192192
"""
193193
yhat = self.predict(X)

0 commit comments

Comments
 (0)