Skip to content

Commit 493a7b3

Browse files
committed
make submodules visible in __init__.py, changed API, reran notebooks
1 parent eed57a6 commit 493a7b3

File tree

11 files changed

+265
-217
lines changed

11 files changed

+265
-217
lines changed

causalpy/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import causalpy.pymc_experiments
2+
import causalpy.pymc_models
3+
import causalpy.skl_experiments
4+
import causalpy.skl_models
5+
6+
from .data import load_data

causalpy/data/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Code for loading datasets."""
2+
from .datasets import load_data
3+
4+
__all__ = ["load_data"]

causalpy/data/datasets.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
import pathlib
3+
4+
import pandas as pd
5+
6+
import causalpy as cp
7+
8+
DATASETS = {
9+
"banks": {"filename": "banks.csv"},
10+
"did": {"filename": "did.csv"},
11+
"drinking": {"filename": "drinking.csv"},
12+
"its": {"filename": "its.csv"},
13+
"its simple": {"filename": "its_simple.csv"},
14+
"rd": {"filename": "regression_discontinuity.csv"},
15+
"sc": {"filename": "synthetic_control.csv"},
16+
}
17+
18+
19+
def get_data_home():
20+
"""Return the path of the data directory"""
21+
return pathlib.Path(cp.__file__).parents[1] / "causalpy" / "data"
22+
23+
24+
def load_data(dataset: str = None):
25+
26+
if dataset in DATASETS:
27+
28+
data_dir = get_data_home()
29+
datafile = DATASETS[dataset]
30+
file_path = data_dir / datafile["filename"]
31+
return pd.read_csv(file_path)
32+
else:
33+
raise ValueError(f"Dataset {dataset} not found!")

docs/notebooks/did_pymc.ipynb

Lines changed: 11 additions & 17 deletions
Large diffs are not rendered by default.

docs/notebooks/did_pymc_banks.ipynb

Lines changed: 30 additions & 20 deletions
Large diffs are not rendered by default.

docs/notebooks/did_skl.ipynb

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
16-
"import pandas as pd\n",
17-
"import pathlib\n",
16+
"import causalpy as cp\n",
1817
"import arviz as az"
1918
]
2019
},
@@ -27,28 +26,13 @@
2726
"az.style.use(\"arviz-darkgrid\")"
2827
]
2928
},
30-
{
31-
"cell_type": "markdown",
32-
"metadata": {},
33-
"source": [
34-
"## Load data"
35-
]
36-
},
3729
{
3830
"cell_type": "code",
3931
"execution_count": 3,
4032
"metadata": {},
4133
"outputs": [],
4234
"source": [
43-
"did_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"did.csv\"\n",
44-
"data = pd.read_csv(did_data_path)"
45-
]
46-
},
47-
{
48-
"cell_type": "markdown",
49-
"metadata": {},
50-
"source": [
51-
"## Run the analysis"
35+
"data = cp.load_data(\"did\")"
5236
]
5337
},
5438
{
@@ -57,26 +41,17 @@
5741
"metadata": {},
5842
"outputs": [],
5943
"source": [
60-
"from causalpy.skl_experiments import DifferenceInDifferences\n",
6144
"from sklearn.linear_model import LinearRegression\n",
6245
"\n",
63-
"# NOTE: `treated` is a deterministic function of `t` and `group`. So add this function into the formula.\n",
6446
"\n",
65-
"result = DifferenceInDifferences(\n",
47+
"result = cp.skl_experiments.DifferenceInDifferences(\n",
6648
" data,\n",
6749
" formula=\"y ~ 1 + group + t + treated:group\",\n",
6850
" time_variable_name=\"t\",\n",
6951
" prediction_model=LinearRegression(),\n",
7052
")"
7153
]
7254
},
73-
{
74-
"cell_type": "markdown",
75-
"metadata": {},
76-
"source": [
77-
"## Examine the results"
78-
]
79-
},
8055
{
8156
"cell_type": "code",
8257
"execution_count": 5,

docs/notebooks/rd_pymc.ipynb

Lines changed: 12 additions & 33 deletions
Large diffs are not rendered by default.

docs/notebooks/rd_pymc_drinking.ipynb

Lines changed: 41 additions & 39 deletions
Large diffs are not rendered by default.

docs/notebooks/rd_skl.ipynb

Lines changed: 88 additions & 19 deletions
Large diffs are not rendered by default.

docs/notebooks/rd_skl_drinking.ipynb

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
20-
"import pandas as pd\n",
21-
"import pathlib\n",
22-
"import arviz as az"
20+
"import arviz as az\n",
21+
"import causalpy as cp"
2322
]
2423
},
2524
{
@@ -31,45 +30,29 @@
3130
"az.style.use(\"arviz-darkgrid\")"
3231
]
3332
},
34-
{
35-
"cell_type": "markdown",
36-
"metadata": {},
37-
"source": [
38-
"## Load data"
39-
]
40-
},
4133
{
4234
"cell_type": "code",
4335
"execution_count": 3,
4436
"metadata": {},
4537
"outputs": [],
4638
"source": [
47-
"rd_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"drinking.csv\"\n",
4839
"df = (\n",
49-
" pd.read_csv(rd_data_path)[[\"agecell\", \"all\", \"mva\", \"suicide\"]]\n",
40+
" cp.load_data(\"drinking\")\n",
5041
" .rename(columns={\"agecell\": \"age\"})\n",
5142
" .assign(treated=lambda df_: df_.age > 21)\n",
5243
" .dropna(axis=0)\n",
5344
")"
5445
]
5546
},
56-
{
57-
"cell_type": "markdown",
58-
"metadata": {},
59-
"source": [
60-
"## Linear model"
61-
]
62-
},
6347
{
6448
"cell_type": "code",
6549
"execution_count": 4,
6650
"metadata": {},
6751
"outputs": [],
6852
"source": [
69-
"from causalpy.skl_experiments import RegressionDiscontinuity\n",
7053
"from sklearn.linear_model import LinearRegression\n",
7154
"\n",
72-
"result = RegressionDiscontinuity(\n",
55+
"result = cp.skl_experiments.RegressionDiscontinuity(\n",
7356
" df,\n",
7457
" formula=\"all ~ 1 + age + treated\",\n",
7558
" running_variable_name=\"age\",\n",

0 commit comments

Comments
 (0)