Skip to content

Commit 4250a96

Browse files
committed
new notebooks
1 parent 9ab6960 commit 4250a96

File tree

2 files changed

+349
-19
lines changed

2 files changed

+349
-19
lines changed

notebooks/experiments.ipynb

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,23 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": 8,
14-
"id": "4f7537bf-c938-4a4c-90a0-6d876e8a69f4",
13+
"execution_count": null,
14+
"id": "a65fa6c7-24f9-4fb6-8382-096f3c487d10",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import pandas as pd\n",
19+
"import numpy as np\n",
20+
"import re\n",
21+
"import pytz\n",
22+
"import os\n",
23+
"from pathlib import Path"
24+
]
25+
},
26+
{
27+
"cell_type": "markdown",
28+
"id": "8d800581-765b-4343-a688-a7b2ef7e679b",
1529
"metadata": {},
16-
"outputs": [
17-
{
18-
"name": "stdin",
19-
"output_type": "stream",
20-
"text": [
21-
"Enter the directory path containing CSV files: /home/jovyan/shared/service-data\n",
22-
"Enter the output file path (e.g., headers_output.csv): headers_output.csv\n"
23-
]
24-
},
25-
{
26-
"name": "stdout",
27-
"output_type": "stream",
28-
"text": [
29-
"Headers saved to headers_output.csv\n"
30-
]
31-
}
32-
],
3330
"source": [
3431
"import os\n",
3532
"import csv\n",
Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "5a32a1a1-383a-4a4d-8670-a814b2ad7cfb",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import pandas as pd\n",
11+
"import numpy as np\n",
12+
"import re\n",
13+
"import pytz\n",
14+
"import os\n",
15+
"from pathlib import Path"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 2,
21+
"id": "a59ab835-50b1-4bfd-b032-51b2557db47f",
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"# Define the base directory\n",
26+
"base_dir = Path.cwd()\n",
27+
"parent_dir = base_dir.parent\n",
28+
"\n",
29+
"# File paths for outputs\n",
30+
"data_files = {\n",
31+
" \"si\": parent_dir / \"outputs\" / \"si.csv\",\n",
32+
" \"ss\": parent_dir / \"outputs\" / \"ss.csv\",\n",
33+
" \"qa_issues\": parent_dir / \"src\" / \"qa_issues_descriptions.csv\",\n",
34+
" \"dept\": parent_dir / \"outputs\" / \"utils\" / \"dept.csv\"\n",
35+
"}\n",
36+
"\n",
37+
"# File paths for inputs\n",
38+
"data_files.update({\n",
39+
" \"rbpo\": parent_dir / \"inputs\" / \"rbpo.csv\",\n",
40+
" \"org_var\": parent_dir / \"inputs\" / \"org_var.csv\",\n",
41+
" \"ifoi_en\": parent_dir / \"inputs\" / \"ifoi_en.csv\",\n",
42+
" \"ifoi_fr\": parent_dir / \"inputs\" / \"ifoi_fr.csv\"\n",
43+
"})\n",
44+
"\n",
45+
"# Load data into dataframes\n",
46+
"si = pd.read_csv(data_files[\"si\"], sep=';', na_values=[], keep_default_na=False)\n",
47+
"ss = pd.read_csv(data_files[\"ss\"], sep=';', na_values=[], keep_default_na=False)"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 101,
53+
"id": "dea6c0f0-bc3b-4c3b-85b0-1a3fa66f87bf",
54+
"metadata": {},
55+
"outputs": [
56+
{
57+
"data": {
58+
"text/html": [
59+
"<div>\n",
60+
"<style scoped>\n",
61+
" .dataframe tbody tr th:only-of-type {\n",
62+
" vertical-align: middle;\n",
63+
" }\n",
64+
"\n",
65+
" .dataframe tbody tr th {\n",
66+
" vertical-align: top;\n",
67+
" }\n",
68+
"\n",
69+
" .dataframe thead th {\n",
70+
" text-align: right;\n",
71+
" }\n",
72+
"</style>\n",
73+
"<table border=\"1\" class=\"dataframe\">\n",
74+
" <thead>\n",
75+
" <tr style=\"text-align: right;\">\n",
76+
" <th>fiscal_yr</th>\n",
77+
" <th>org_id</th>\n",
78+
" <th>department_en</th>\n",
79+
" <th>service_id</th>\n",
80+
" <th>2018-2019</th>\n",
81+
" <th>2019-2020</th>\n",
82+
" <th>2020-2021</th>\n",
83+
" <th>2021-2022</th>\n",
84+
" <th>2022-2023</th>\n",
85+
" <th>2023-2024</th>\n",
86+
" <th>diff_latest</th>\n",
87+
" </tr>\n",
88+
" </thead>\n",
89+
" <tbody>\n",
90+
" <tr>\n",
91+
" <th>2211</th>\n",
92+
" <td>46</td>\n",
93+
" <td>Canada Revenue Agency</td>\n",
94+
" <td>3728</td>\n",
95+
" <td>0.0</td>\n",
96+
" <td>0.0</td>\n",
97+
" <td>0.0</td>\n",
98+
" <td>0.0</td>\n",
99+
" <td>0.0</td>\n",
100+
" <td>45126410.0</td>\n",
101+
" <td>45126410.0</td>\n",
102+
" </tr>\n",
103+
" <tr>\n",
104+
" <th>1706</th>\n",
105+
" <td>26</td>\n",
106+
" <td>Canada Border Services Agency</td>\n",
107+
" <td>669</td>\n",
108+
" <td>57653256.0</td>\n",
109+
" <td>55801982.0</td>\n",
110+
" <td>8678302.0</td>\n",
111+
" <td>23381642.0</td>\n",
112+
" <td>70361874.0</td>\n",
113+
" <td>89154252.0</td>\n",
114+
" <td>18792378.0</td>\n",
115+
" </tr>\n",
116+
" <tr>\n",
117+
" <th>1609</th>\n",
118+
" <td>228</td>\n",
119+
" <td>National Research Council Canada</td>\n",
120+
" <td>1677</td>\n",
121+
" <td>0.0</td>\n",
122+
" <td>0.0</td>\n",
123+
" <td>0.0</td>\n",
124+
" <td>0.0</td>\n",
125+
" <td>57000000.0</td>\n",
126+
" <td>69000000.0</td>\n",
127+
" <td>12000000.0</td>\n",
128+
" </tr>\n",
129+
" <tr>\n",
130+
" <th>2173</th>\n",
131+
" <td>46</td>\n",
132+
" <td>Canada Revenue Agency</td>\n",
133+
" <td>1110</td>\n",
134+
" <td>1525.0</td>\n",
135+
" <td>1035.0</td>\n",
136+
" <td>16908.0</td>\n",
137+
" <td>60202.0</td>\n",
138+
" <td>131499.0</td>\n",
139+
" <td>6142697.0</td>\n",
140+
" <td>6011198.0</td>\n",
141+
" </tr>\n",
142+
" <tr>\n",
143+
" <th>1724</th>\n",
144+
" <td>26</td>\n",
145+
" <td>Canada Border Services Agency</td>\n",
146+
" <td>728</td>\n",
147+
" <td>21739228.0</td>\n",
148+
" <td>20207714.0</td>\n",
149+
" <td>19892542.0</td>\n",
150+
" <td>21910803.0</td>\n",
151+
" <td>22976516.0</td>\n",
152+
" <td>25317100.0</td>\n",
153+
" <td>2340584.0</td>\n",
154+
" </tr>\n",
155+
" <tr>\n",
156+
" <th>...</th>\n",
157+
" <td>...</td>\n",
158+
" <td>...</td>\n",
159+
" <td>...</td>\n",
160+
" <td>...</td>\n",
161+
" <td>...</td>\n",
162+
" <td>...</td>\n",
163+
" <td>...</td>\n",
164+
" <td>...</td>\n",
165+
" <td>...</td>\n",
166+
" <td>...</td>\n",
167+
" </tr>\n",
168+
" <tr>\n",
169+
" <th>2174</th>\n",
170+
" <td>46</td>\n",
171+
" <td>Canada Revenue Agency</td>\n",
172+
" <td>1111</td>\n",
173+
" <td>43107477.0</td>\n",
174+
" <td>39999259.0</td>\n",
175+
" <td>42481757.0</td>\n",
176+
" <td>39880293.0</td>\n",
177+
" <td>40803035.0</td>\n",
178+
" <td>39760412.0</td>\n",
179+
" <td>-1042623.0</td>\n",
180+
" </tr>\n",
181+
" <tr>\n",
182+
" <th>1848</th>\n",
183+
" <td>282</td>\n",
184+
" <td>Public Service Commission of Canada</td>\n",
185+
" <td>1195</td>\n",
186+
" <td>1000000.0</td>\n",
187+
" <td>488504.0</td>\n",
188+
" <td>1242967.0</td>\n",
189+
" <td>1128947.0</td>\n",
190+
" <td>1128947.0</td>\n",
191+
" <td>10234.0</td>\n",
192+
" <td>-1118713.0</td>\n",
193+
" </tr>\n",
194+
" <tr>\n",
195+
" <th>1494</th>\n",
196+
" <td>151</td>\n",
197+
" <td>Financial Consumer Agency of Canada</td>\n",
198+
" <td>1726</td>\n",
199+
" <td>0.0</td>\n",
200+
" <td>0.0</td>\n",
201+
" <td>10243558.0</td>\n",
202+
" <td>9830975.0</td>\n",
203+
" <td>10104531.0</td>\n",
204+
" <td>2504.0</td>\n",
205+
" <td>-10102027.0</td>\n",
206+
" </tr>\n",
207+
" <tr>\n",
208+
" <th>2224</th>\n",
209+
" <td>46</td>\n",
210+
" <td>Canada Revenue Agency</td>\n",
211+
" <td>SRV03577</td>\n",
212+
" <td>0.0</td>\n",
213+
" <td>0.0</td>\n",
214+
" <td>0.0</td>\n",
215+
" <td>0.0</td>\n",
216+
" <td>32992344.0</td>\n",
217+
" <td>0.0</td>\n",
218+
" <td>-32992344.0</td>\n",
219+
" </tr>\n",
220+
" <tr>\n",
221+
" <th>2171</th>\n",
222+
" <td>46</td>\n",
223+
" <td>Canada Revenue Agency</td>\n",
224+
" <td>1108</td>\n",
225+
" <td>38273778.0</td>\n",
226+
" <td>38891905.0</td>\n",
227+
" <td>20783178.0</td>\n",
228+
" <td>40192111.0</td>\n",
229+
" <td>49732240.0</td>\n",
230+
" <td>238248.0</td>\n",
231+
" <td>-49493992.0</td>\n",
232+
" </tr>\n",
233+
" </tbody>\n",
234+
"</table>\n",
235+
"<p>2519 rows × 10 columns</p>\n",
236+
"</div>"
237+
],
238+
"text/plain": [
239+
"fiscal_yr org_id department_en service_id 2018-2019 \\\n",
240+
"2211 46 Canada Revenue Agency 3728 0.0 \n",
241+
"1706 26 Canada Border Services Agency 669 57653256.0 \n",
242+
"1609 228 National Research Council Canada 1677 0.0 \n",
243+
"2173 46 Canada Revenue Agency 1110 1525.0 \n",
244+
"1724 26 Canada Border Services Agency 728 21739228.0 \n",
245+
"... ... ... ... ... \n",
246+
"2174 46 Canada Revenue Agency 1111 43107477.0 \n",
247+
"1848 282 Public Service Commission of Canada 1195 1000000.0 \n",
248+
"1494 151 Financial Consumer Agency of Canada 1726 0.0 \n",
249+
"2224 46 Canada Revenue Agency SRV03577 0.0 \n",
250+
"2171 46 Canada Revenue Agency 1108 38273778.0 \n",
251+
"\n",
252+
"fiscal_yr 2019-2020 2020-2021 2021-2022 2022-2023 2023-2024 \\\n",
253+
"2211 0.0 0.0 0.0 0.0 45126410.0 \n",
254+
"1706 55801982.0 8678302.0 23381642.0 70361874.0 89154252.0 \n",
255+
"1609 0.0 0.0 0.0 57000000.0 69000000.0 \n",
256+
"2173 1035.0 16908.0 60202.0 131499.0 6142697.0 \n",
257+
"1724 20207714.0 19892542.0 21910803.0 22976516.0 25317100.0 \n",
258+
"... ... ... ... ... ... \n",
259+
"2174 39999259.0 42481757.0 39880293.0 40803035.0 39760412.0 \n",
260+
"1848 488504.0 1242967.0 1128947.0 1128947.0 10234.0 \n",
261+
"1494 0.0 10243558.0 9830975.0 10104531.0 2504.0 \n",
262+
"2224 0.0 0.0 0.0 32992344.0 0.0 \n",
263+
"2171 38891905.0 20783178.0 40192111.0 49732240.0 238248.0 \n",
264+
"\n",
265+
"fiscal_yr diff_latest \n",
266+
"2211 45126410.0 \n",
267+
"1706 18792378.0 \n",
268+
"1609 12000000.0 \n",
269+
"2173 6011198.0 \n",
270+
"1724 2340584.0 \n",
271+
"... ... \n",
272+
"2174 -1042623.0 \n",
273+
"1848 -1118713.0 \n",
274+
"1494 -10102027.0 \n",
275+
"2224 -32992344.0 \n",
276+
"2171 -49493992.0 \n",
277+
"\n",
278+
"[2519 rows x 10 columns]"
279+
]
280+
},
281+
"execution_count": 101,
282+
"metadata": {},
283+
"output_type": "execute_result"
284+
}
285+
],
286+
"source": [
287+
"# biggest swings in service volumes\n",
288+
"sv = si.loc[:,['fiscal_yr', 'org_id','department_en', 'service_id', 'num_applications_total']]\n",
289+
"sv = sv.iloc[:-1] #cut last row, is timestamp\n",
290+
"\n",
291+
"sv['num_applications_total'] = pd.to_numeric(sv['num_applications_total'])\n",
292+
"\n",
293+
"sv = sv.pivot_table(\n",
294+
" values='num_applications_total', \n",
295+
" index=['org_id','department_en', 'service_id'], \n",
296+
" columns='fiscal_yr',\n",
297+
" aggfunc='sum')\n",
298+
"\n",
299+
"sv = sv.fillna(0).reset_index()\n",
300+
"\n",
301+
"sv_diff = sv.iloc[:,:-1]\n",
302+
"\n",
303+
"sv_diff['diff_latest'] = sv_diff.iloc[:, -1]-sv_diff.iloc[:, -2]\n",
304+
"\n",
305+
"sv_diff = sv_diff.sort_values(by='diff_latest', ascending=False)\n",
306+
"\n",
307+
"\n",
308+
"sv_diff"
309+
]
310+
}
311+
],
312+
"metadata": {
313+
"kernelspec": {
314+
"display_name": "Python 3 (ipykernel)",
315+
"language": "python",
316+
"name": "python3"
317+
},
318+
"language_info": {
319+
"codemirror_mode": {
320+
"name": "ipython",
321+
"version": 3
322+
},
323+
"file_extension": ".py",
324+
"mimetype": "text/x-python",
325+
"name": "python",
326+
"nbconvert_exporter": "python",
327+
"pygments_lexer": "ipython3",
328+
"version": "3.12.8"
329+
}
330+
},
331+
"nbformat": 4,
332+
"nbformat_minor": 5
333+
}

0 commit comments

Comments
 (0)