Skip to content

Commit 10a9c82

Browse files
committed
#76 #44 progress on banking example data processing
1 parent 1831dc4 commit 10a9c82

File tree

1 file changed

+226
-20
lines changed

1 file changed

+226
-20
lines changed

docs/notebooks/did_pymc_banks.ipynb

Lines changed: 226 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": 121,
14+
"execution_count": 1,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
@@ -22,7 +22,7 @@
2222
},
2323
{
2424
"cell_type": "code",
25-
"execution_count": 122,
25+
"execution_count": 2,
2626
"metadata": {},
2727
"outputs": [],
2828
"source": [
@@ -32,18 +32,9 @@
3232
},
3333
{
3434
"cell_type": "code",
35-
"execution_count": 123,
35+
"execution_count": 3,
3636
"metadata": {},
37-
"outputs": [
38-
{
39-
"name": "stdout",
40-
"output_type": "stream",
41-
"text": [
42-
"The autoreload extension is already loaded. To reload it, use:\n",
43-
" %reload_ext autoreload\n"
44-
]
45-
}
46-
],
37+
"outputs": [],
4738
"source": [
4839
"%load_ext autoreload\n",
4940
"%autoreload 2"
@@ -60,7 +51,7 @@
6051
},
6152
{
6253
"cell_type": "code",
63-
"execution_count": 124,
54+
"execution_count": 4,
6455
"metadata": {},
6556
"outputs": [],
6657
"source": [
@@ -89,7 +80,7 @@
8980
},
9081
{
9182
"cell_type": "code",
92-
"execution_count": 125,
83+
"execution_count": 5,
9384
"metadata": {},
9485
"outputs": [
9586
{
@@ -111,12 +102,12 @@
111102
"cell_type": "markdown",
112103
"metadata": {},
113104
"source": [
114-
"As a final processing step to make the data amenable to analysis, we will convert from wide form into long form."
105+
"Just a few more a final processing steps to make the data amenable to analysis. We will convert from wide form into long form. Then we will add a new column `treated` which indicates the observations where treatment has taken place."
115106
]
116107
},
117108
{
118109
"cell_type": "code",
119-
"execution_count": 126,
110+
"execution_count": 6,
120111
"metadata": {},
121112
"outputs": [
122113
{
@@ -238,7 +229,7 @@
238229
"11 1934 Eighth District 110.0"
239230
]
240231
},
241-
"execution_count": 126,
232+
"execution_count": 6,
242233
"metadata": {},
243234
"output_type": "execute_result"
244235
}
@@ -255,11 +246,226 @@
255246
"df_long"
256247
]
257248
},
249+
{
250+
"cell_type": "code",
251+
"execution_count": 7,
252+
"metadata": {},
253+
"outputs": [
254+
{
255+
"data": {
256+
"text/html": [
257+
"<div>\n",
258+
"<style scoped>\n",
259+
" .dataframe tbody tr th:only-of-type {\n",
260+
" vertical-align: middle;\n",
261+
" }\n",
262+
"\n",
263+
" .dataframe tbody tr th {\n",
264+
" vertical-align: top;\n",
265+
" }\n",
266+
"\n",
267+
" .dataframe thead th {\n",
268+
" text-align: right;\n",
269+
" }\n",
270+
"</style>\n",
271+
"<table border=\"1\" class=\"dataframe\">\n",
272+
" <thead>\n",
273+
" <tr style=\"text-align: right;\">\n",
274+
" <th></th>\n",
275+
" <th>year</th>\n",
276+
" <th>district</th>\n",
277+
" <th>bib</th>\n",
278+
" <th>treated</th>\n",
279+
" </tr>\n",
280+
" </thead>\n",
281+
" <tbody>\n",
282+
" <tr>\n",
283+
" <th>0</th>\n",
284+
" <td>1929</td>\n",
285+
" <td>Sixth District</td>\n",
286+
" <td>141.0</td>\n",
287+
" <td>False</td>\n",
288+
" </tr>\n",
289+
" <tr>\n",
290+
" <th>6</th>\n",
291+
" <td>1929</td>\n",
292+
" <td>Eighth District</td>\n",
293+
" <td>170.0</td>\n",
294+
" <td>False</td>\n",
295+
" </tr>\n",
296+
" <tr>\n",
297+
" <th>1</th>\n",
298+
" <td>1930</td>\n",
299+
" <td>Sixth District</td>\n",
300+
" <td>135.0</td>\n",
301+
" <td>False</td>\n",
302+
" </tr>\n",
303+
" <tr>\n",
304+
" <th>7</th>\n",
305+
" <td>1930</td>\n",
306+
" <td>Eighth District</td>\n",
307+
" <td>165.0</td>\n",
308+
" <td>False</td>\n",
309+
" </tr>\n",
310+
" <tr>\n",
311+
" <th>2</th>\n",
312+
" <td>1931</td>\n",
313+
" <td>Sixth District</td>\n",
314+
" <td>121.0</td>\n",
315+
" <td>True</td>\n",
316+
" </tr>\n",
317+
" <tr>\n",
318+
" <th>8</th>\n",
319+
" <td>1931</td>\n",
320+
" <td>Eighth District</td>\n",
321+
" <td>132.0</td>\n",
322+
" <td>False</td>\n",
323+
" </tr>\n",
324+
" <tr>\n",
325+
" <th>3</th>\n",
326+
" <td>1932</td>\n",
327+
" <td>Sixth District</td>\n",
328+
" <td>113.0</td>\n",
329+
" <td>True</td>\n",
330+
" </tr>\n",
331+
" <tr>\n",
332+
" <th>9</th>\n",
333+
" <td>1932</td>\n",
334+
" <td>Eighth District</td>\n",
335+
" <td>120.0</td>\n",
336+
" <td>False</td>\n",
337+
" </tr>\n",
338+
" <tr>\n",
339+
" <th>4</th>\n",
340+
" <td>1933</td>\n",
341+
" <td>Sixth District</td>\n",
342+
" <td>102.0</td>\n",
343+
" <td>True</td>\n",
344+
" </tr>\n",
345+
" <tr>\n",
346+
" <th>10</th>\n",
347+
" <td>1933</td>\n",
348+
" <td>Eighth District</td>\n",
349+
" <td>111.0</td>\n",
350+
" <td>False</td>\n",
351+
" </tr>\n",
352+
" <tr>\n",
353+
" <th>5</th>\n",
354+
" <td>1934</td>\n",
355+
" <td>Sixth District</td>\n",
356+
" <td>102.0</td>\n",
357+
" <td>True</td>\n",
358+
" </tr>\n",
359+
" <tr>\n",
360+
" <th>11</th>\n",
361+
" <td>1934</td>\n",
362+
" <td>Eighth District</td>\n",
363+
" <td>110.0</td>\n",
364+
" <td>False</td>\n",
365+
" </tr>\n",
366+
" </tbody>\n",
367+
"</table>\n",
368+
"</div>"
369+
],
370+
"text/plain": [
371+
" year district bib treated\n",
372+
"0 1929 Sixth District 141.0 False\n",
373+
"6 1929 Eighth District 170.0 False\n",
374+
"1 1930 Sixth District 135.0 False\n",
375+
"7 1930 Eighth District 165.0 False\n",
376+
"2 1931 Sixth District 121.0 True\n",
377+
"8 1931 Eighth District 132.0 False\n",
378+
"3 1932 Sixth District 113.0 True\n",
379+
"9 1932 Eighth District 120.0 False\n",
380+
"4 1933 Sixth District 102.0 True\n",
381+
"10 1933 Eighth District 111.0 False\n",
382+
"5 1934 Sixth District 102.0 True\n",
383+
"11 1934 Eighth District 110.0 False"
384+
]
385+
},
386+
"execution_count": 7,
387+
"metadata": {},
388+
"output_type": "execute_result"
389+
}
390+
],
391+
"source": [
392+
"# df_long.assign(treated=lambda x: True if (x.year>=1931 & x.district==\"Sixth District\") else False)\n",
393+
"\n",
394+
"df_long[\"treated\"] = (df_long.year >= 1931) & (df_long.district == \"Sixth District\")\n",
395+
"df_long"
396+
]
397+
},
258398
{
259399
"cell_type": "markdown",
260400
"metadata": {},
261401
"source": [
262-
"## Run the analysis"
402+
"## Analysis 1\n",
403+
"\n",
404+
"First we'll do an analysis just looking at data from 1930 and 1931. This way we just have a single pre-intervention and a single post-intervention measurement."
405+
]
406+
},
407+
{
408+
"cell_type": "code",
409+
"execution_count": 8,
410+
"metadata": {},
411+
"outputs": [],
412+
"source": [
413+
"# df1 = df_long[df_long.year.isin([1930, 1931])]\n",
414+
"# df1"
415+
]
416+
},
417+
{
418+
"cell_type": "code",
419+
"execution_count": null,
420+
"metadata": {},
421+
"outputs": [],
422+
"source": [
423+
"from causalpy.pymc_experiments import DifferenceInDifferences\n",
424+
"from causalpy.pymc_models import LinearRegression\n",
425+
"\n",
426+
"result = DifferenceInDifferences(\n",
427+
" df_long[df_long.year.isin([1930, 1931])],\n",
428+
" formula=\"bib ~ 1 + district + year + district:treated\",\n",
429+
" time_variable_name=\"year\",\n",
430+
" prediction_model=LinearRegression(),\n",
431+
")"
432+
]
433+
},
434+
{
435+
"cell_type": "code",
436+
"execution_count": null,
437+
"metadata": {},
438+
"outputs": [],
439+
"source": [
440+
"fig, ax = result.plot()"
441+
]
442+
},
443+
{
444+
"cell_type": "code",
445+
"execution_count": null,
446+
"metadata": {},
447+
"outputs": [],
448+
"source": [
449+
"result.summary()"
450+
]
451+
},
452+
{
453+
"cell_type": "code",
454+
"execution_count": null,
455+
"metadata": {},
456+
"outputs": [],
457+
"source": [
458+
"ax = az.plot_posterior(result.causal_impact, ref_val=0)\n",
459+
"ax.set(title=\"Posterior estimate of causal impact\");"
460+
]
461+
},
462+
{
463+
"cell_type": "markdown",
464+
"metadata": {},
465+
"source": [
466+
"## Analysis 2\n",
467+
"\n",
468+
"Now we'll do a difference in differences analysis of the full dataset."
263469
]
264470
},
265471
{
@@ -273,7 +479,7 @@
273479
"\n",
274480
"result = DifferenceInDifferences(\n",
275481
" df_long,\n",
276-
" formula=\"bib ~ 1 + district + year + district:year\",\n",
482+
" formula=\"bib ~ 1 + district + year + district:treated\",\n",
277483
" time_variable_name=\"year\",\n",
278484
" prediction_model=LinearRegression(),\n",
279485
")"

0 commit comments

Comments
 (0)