Skip to content

Commit 4a23838

Browse files
authored
Merge pull request #87 from pymc-labs/quickstart-fixes-and-general-improvement
Quickstart fixes and general improvement
2 parents cce4909 + 960c8f2 commit 4a23838

17 files changed

+302
-249
lines changed

CONTRIBUTING.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,31 @@ If there are autodoc issues/errors in remote builds of the docs, we need to add
7272

7373
## New releases [work in progress]
7474

75+
### Test release to `test.pypi.org` (manual)
76+
7577
1. Bump the release version in `causalpy/version.py`. This is automatically read by `setup.py` and `docs/config.py`.
78+
2. Update on test.pypi.org. _Note that this requires username and password for test.pypi.org_. In the root directory type the following:
79+
```bash
80+
rm -rf dist
81+
python setup.py sdist
82+
twine upload --repository testpypi dist/*
83+
```
84+
3. At this point the updated build is available on test.pypi.org. We can test that this is working as expected by installing (into a test environment) from test.pypi.org with
85+
86+
```bash
87+
conda create -n causalpy-test python
88+
conda activate causalpy-test
89+
python3 -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ causalpy
90+
```
91+
92+
4. Now load a python or ipython session and follow the quickstart instructions to confirm things work.
93+
94+
### Actual release to `pypi.org` (manual)
95+
96+
1. If not done in the previous step, bump the release version in `causalpy/version.py`. This is automatically read by `setup.py` and `docs/config.py`.
7697
2. Update on pypi.org. In the root directory:
7798
- `python setup.py sdist`
78-
- update to pypi.org with `twine upload dist/*`
99+
- update to pypi.org with `twine upload dist/*` Note that this requires username and password for pypi.org.
79100
3. Readthedocs:
80101
- Docs should be built remotely every time there is a pull request
81102
- See here https://docs.readthedocs.io/en/stable/tutorial/#versioning-documentation for versioning the docs

README.md

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,27 +37,23 @@ pip install git+https://github.com/pymc-labs/CausalPy.git
3737
## Quickstart
3838

3939
```python
40-
from causalpy.pymc_experiments import RegressionDiscontinuity
41-
from causalpy.pymc_models import LinearRegression
42-
import pandas as pd
43-
import pathlib
40+
import causalpy as cp
4441

4542

4643
# Import and process data
47-
rd_data_path = pathlib.Path.cwd().parents[1] / "causalpy" / "data" / "drinking.csv"
4844
df = (
49-
pd.read_csv(rd_data_path)[["agecell", "all", "mva", "suicide"]]
45+
cp.load_data("drinking")
5046
.rename(columns={"agecell": "age"})
5147
.assign(treated=lambda df_: df_.age > 21)
5248
.dropna(axis=0)
5349
)
5450

5551
# Run the analysis
56-
result = RegressionDiscontinuity(
52+
result = cp.pymc_experiments.RegressionDiscontinuity(
5753
df,
5854
formula="all ~ 1 + age + treated",
5955
running_variable_name="age",
60-
prediction_model=LinearRegression(),
56+
prediction_model=cp.pymc_models.LinearRegression(),
6157
treatment_threshold=21,
6258
)
6359

causalpy/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import causalpy.pymc_experiments
2+
import causalpy.pymc_models
3+
import causalpy.skl_experiments
4+
import causalpy.skl_models
5+
6+
from .data import load_data

causalpy/data/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Code for loading datasets."""
2+
from .datasets import load_data
3+
4+
__all__ = ["load_data"]

causalpy/data/datasets.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
import pathlib
3+
4+
import pandas as pd
5+
6+
import causalpy as cp
7+
8+
DATASETS = {
9+
"banks": {"filename": "banks.csv"},
10+
"did": {"filename": "did.csv"},
11+
"drinking": {"filename": "drinking.csv"},
12+
"its": {"filename": "its.csv"},
13+
"its simple": {"filename": "its_simple.csv"},
14+
"rd": {"filename": "regression_discontinuity.csv"},
15+
"sc": {"filename": "synthetic_control.csv"},
16+
}
17+
18+
19+
def get_data_home():
20+
"""Return the path of the data directory"""
21+
return pathlib.Path(cp.__file__).parents[1] / "causalpy" / "data"
22+
23+
24+
def load_data(dataset: str = None):
25+
26+
if dataset in DATASETS:
27+
28+
data_dir = get_data_home()
29+
datafile = DATASETS[dataset]
30+
file_path = data_dir / datafile["filename"]
31+
return pd.read_csv(file_path)
32+
else:
33+
raise ValueError(f"Dataset {dataset} not found!")

causalpy/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.2"
1+
__version__ = "0.0.3"

docs/index.rst

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,27 +29,23 @@ Quickstart
2929

3030
.. code-block:: python
3131
32-
from causalpy.pymc_experiments import RegressionDiscontinuity
33-
from causalpy.pymc_models import LinearRegression
34-
import pandas as pd
35-
import pathlib
32+
import causalpy as cp
3633
3734
3835
# Import and process data
39-
rd_data_path = pathlib.Path.cwd().parents[1] / "causalpy" / "data" / "drinking.csv"
4036
df = (
41-
pd.read_csv(rd_data_path)[["agecell", "all", "mva", "suicide"]]
37+
cp.load_data("drinking")
4238
.rename(columns={"agecell": "age"})
4339
.assign(treated=lambda df_: df_.age > 21)
4440
.dropna(axis=0)
4541
)
4642
4743
# Run the analysis
48-
result = RegressionDiscontinuity(
44+
result = cp.pymc_experiments.RegressionDiscontinuity(
4945
df,
5046
formula="all ~ 1 + age + treated",
5147
running_variable_name="age",
52-
prediction_model=LinearRegression(),
48+
prediction_model=cp.pymc_models.LinearRegression(),
5349
treatment_threshold=21,
5450
)
5551

docs/notebooks/did_pymc.ipynb

Lines changed: 11 additions & 17 deletions
Large diffs are not rendered by default.

docs/notebooks/did_pymc_banks.ipynb

Lines changed: 30 additions & 20 deletions
Large diffs are not rendered by default.

docs/notebooks/did_skl.ipynb

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
16-
"import pandas as pd\n",
17-
"import pathlib\n",
16+
"import causalpy as cp\n",
1817
"import arviz as az"
1918
]
2019
},
@@ -27,28 +26,13 @@
2726
"az.style.use(\"arviz-darkgrid\")"
2827
]
2928
},
30-
{
31-
"cell_type": "markdown",
32-
"metadata": {},
33-
"source": [
34-
"## Load data"
35-
]
36-
},
3729
{
3830
"cell_type": "code",
3931
"execution_count": 3,
4032
"metadata": {},
4133
"outputs": [],
4234
"source": [
43-
"did_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"did.csv\"\n",
44-
"data = pd.read_csv(did_data_path)"
45-
]
46-
},
47-
{
48-
"cell_type": "markdown",
49-
"metadata": {},
50-
"source": [
51-
"## Run the analysis"
35+
"data = cp.load_data(\"did\")"
5236
]
5337
},
5438
{
@@ -57,26 +41,17 @@
5741
"metadata": {},
5842
"outputs": [],
5943
"source": [
60-
"from causalpy.skl_experiments import DifferenceInDifferences\n",
6144
"from sklearn.linear_model import LinearRegression\n",
6245
"\n",
63-
"# NOTE: `treated` is a deterministic function of `t` and `group`. So add this function into the formula.\n",
6446
"\n",
65-
"result = DifferenceInDifferences(\n",
47+
"result = cp.skl_experiments.DifferenceInDifferences(\n",
6648
" data,\n",
6749
" formula=\"y ~ 1 + group + t + treated:group\",\n",
6850
" time_variable_name=\"t\",\n",
6951
" prediction_model=LinearRegression(),\n",
7052
")"
7153
]
7254
},
73-
{
74-
"cell_type": "markdown",
75-
"metadata": {},
76-
"source": [
77-
"## Examine the results"
78-
]
79-
},
8055
{
8156
"cell_type": "code",
8257
"execution_count": 5,

0 commit comments

Comments
 (0)