Skip to content

Commit a213046

Browse files
committed
Merge branch 'dev/datapack'
2 parents f269415 + 7090514 commit a213046

File tree

8 files changed

+9181
-18
lines changed

8 files changed

+9181
-18
lines changed

2024-03-Datapack.py

Lines changed: 412 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/2025-05-01DATAPACKMETRIC.ipynb

Lines changed: 1318 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/experiment-datapack.ipynb

Lines changed: 5507 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/experiment-template.ipynb

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 2,
5+
"execution_count": 1,
66
"id": "fd2d413a-666f-49d4-b86c-7d6898a496a5",
77
"metadata": {},
88
"outputs": [],
@@ -26,7 +26,7 @@
2626
},
2727
{
2828
"cell_type": "code",
29-
"execution_count": 3,
29+
"execution_count": 30,
3030
"id": "be959c79",
3131
"metadata": {},
3232
"outputs": [],
@@ -43,6 +43,34 @@
4343
" delimiter=';'\n",
4444
" )"
4545
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"id": "434a0e1a",
51+
"metadata": {},
52+
"outputs": [
53+
{
54+
"data": {
55+
"text/plain": [
56+
"(559,)"
57+
]
58+
},
59+
"execution_count": 38,
60+
"metadata": {},
61+
"output_type": "execute_result"
62+
}
63+
],
64+
"source": [
65+
"si_drf = si.loc[:, ['service_id', 'fiscal_yr', 'program_id', 'org_id']]\n",
66+
"si_drf['program_id'] = si_drf['program_id'].str.split(',')\n",
67+
"si_drf = si_drf.explode('program_id')\n",
68+
"si_drf = si_drf[si_drf['program_id'].notna()]\n",
69+
"\n",
70+
"# si_drf = si_drf.loc[si_drf['fiscal_yr']=='2023-2024']\n",
71+
"\n",
72+
"si_drf['program_id'].unique().shape"
73+
]
4674
}
4775
],
4876
"metadata": {

notebooks/experiments-drf.ipynb

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,22 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": null,
13+
"execution_count": 5,
1414
"id": "a65fa6c7-24f9-4fb6-8382-096f3c487d10",
1515
"metadata": {},
16-
"outputs": [],
16+
"outputs": [
17+
{
18+
"ename": "ModuleNotFoundError",
19+
"evalue": "No module named 'src'",
20+
"output_type": "error",
21+
"traceback": [
22+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
23+
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
24+
"Cell \u001b[0;32mIn[5], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msys\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# sys.path.append(\"/home/jovyan/shared/service-data\")\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclean\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m clean_percentage, normalize_string, standardize_column_names, clean_fiscal_yr\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mload\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_csv_from_raw\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexport\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m export_to_csv\n",
25+
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'"
26+
]
27+
}
28+
],
1729
"source": [
1830
"import pandas as pd\n",
1931
"import numpy as np\n",
@@ -32,16 +44,19 @@
3244
},
3345
{
3446
"cell_type": "code",
35-
"execution_count": 43,
47+
"execution_count": 4,
3648
"id": "048e8db3-f8e2-45f0-a9de-5ca5bc0fe351",
3749
"metadata": {},
3850
"outputs": [
3951
{
40-
"name": "stdout",
41-
"output_type": "stream",
42-
"text": [
43-
"Exported dept.csv to /home/jovyan/shared/service-data/outputs/utils\n",
44-
"Exported si.csv to /home/jovyan/shared/service-data/outputs\n"
52+
"ename": "NameError",
53+
"evalue": "name 'merge_si' is not defined",
54+
"output_type": "error",
55+
"traceback": [
56+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
57+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
58+
"Cell \u001b[0;32mIn[4], line 12\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# File paths for outputs\u001b[39;00m\n\u001b[1;32m 6\u001b[0m data_files \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 7\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrbpo\u001b[39m\u001b[38;5;124m\"\u001b[39m: parent_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrbpo.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 8\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morg_var\u001b[39m\u001b[38;5;124m\"\u001b[39m: parent_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morg_var.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserv_prog\u001b[39m\u001b[38;5;124m\"\u001b[39m: parent_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserv_prog.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 10\u001b[0m }\n\u001b[0;32m---> 12\u001b[0m si \u001b[38;5;241m=\u001b[39m \u001b[43mmerge_si\u001b[49m()\n\u001b[1;32m 13\u001b[0m rbpo \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(data_files[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrbpo\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 14\u001b[0m serv_prog \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(data_files[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserv_prog\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n",
59+
"\u001b[0;31mNameError\u001b[0m: name 'merge_si' is not defined"
4560
]
4661
}
4762
],
@@ -65,6 +80,28 @@
6580
"rbpo['fiscal_yr'] = rbpo['fiscal_yr'].apply(clean_fiscal_yr)"
6681
]
6782
},
83+
{
84+
"cell_type": "code",
85+
"execution_count": 3,
86+
"id": "9a18f4e3",
87+
"metadata": {},
88+
"outputs": [
89+
{
90+
"ename": "NameError",
91+
"evalue": "name 'si' is not defined",
92+
"output_type": "error",
93+
"traceback": [
94+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
95+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
96+
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msi\u001b[49m\n",
97+
"\u001b[0;31mNameError\u001b[0m: name 'si' is not defined"
98+
]
99+
}
100+
],
101+
"source": [
102+
"si"
103+
]
104+
},
68105
{
69106
"cell_type": "markdown",
70107
"id": "77e12a73-f357-4834-a99f-eab56fcc87dd",
@@ -192,7 +229,7 @@
192229
],
193230
"metadata": {
194231
"kernelspec": {
195-
"display_name": "Python 3 (ipykernel)",
232+
"display_name": "Python 3",
196233
"language": "python",
197234
"name": "python3"
198235
},
@@ -206,7 +243,7 @@
206243
"name": "python",
207244
"nbconvert_exporter": "python",
208245
"pygments_lexer": "ipython3",
209-
"version": "3.12.8"
246+
"version": "3.12.1"
210247
}
211248
},
212249
"nbformat": 4,

0 commit comments

Comments
 (0)