Skip to content

Commit bf2eaed

Browse files
committed
updated drf to exclude incomplete actual yrs
1 parent 34c91aa commit bf2eaed

File tree

4 files changed

+71
-304
lines changed

4 files changed

+71
-304
lines changed

notebooks/drf-yrs.ipynb

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 2,
66
"id": "fd2d413a-666f-49d4-b86c-7d6898a496a5",
77
"metadata": {},
88
"outputs": [],
@@ -29,7 +29,7 @@
2929
},
3030
{
3131
"cell_type": "code",
32-
"execution_count": null,
32+
"execution_count": 3,
3333
"id": "45467185",
3434
"metadata": {},
3535
"outputs": [],
@@ -53,21 +53,8 @@
5353
},
5454
{
5555
"cell_type": "code",
56-
"execution_count": null,
57-
"id": "2373cc17",
58-
"metadata": {},
59-
"outputs": [],
60-
"source": [
61-
"drf.loc[(drf['program_id']=='BRB01')]\n",
62-
"# drf['valid_plan'] = (drf['report_yr'] > latest_si_fy)\n",
63-
"\n",
64-
"# drf[(drf['org_id']==130) & (drf['program_id']=='ISS02') & (drf['planned_actual']=='planned') & (drf['report_yr'] > latest_si_fy)]\n"
65-
]
66-
},
67-
{
68-
"cell_type": "code",
69-
"execution_count": null,
70-
"id": "a5c0a639",
56+
"execution_count": 4,
57+
"id": "e9dde87f",
7158
"metadata": {},
7259
"outputs": [],
7360
"source": [
@@ -76,6 +63,8 @@
7663
"drf = standardize_column_names(drf)\n",
7764
"drf['fiscal_yr'] = drf['fiscal_yr'].apply(clean_fiscal_yr)\n",
7865
"\n",
66+
"si['org_id']=pd.to_numeric(si['org_id'].astype(int), errors='coerce')\n",
67+
"\n",
7968
"# Define columns related to planned and actual measures: spending and FTEs\n",
8069
"# These columns will be unpivoted / melted\n",
8170
"fte_spend_cols = [\n",
@@ -111,44 +100,60 @@
111100
"\n",
112101
"# Latest SI fiscal year per org (end year as int)\n",
113102
"si_latest = (si.assign(lat_end=pd.to_numeric(si['fiscal_yr'].str.split('-').str[-1], errors='coerce'))\n",
114-
" .groupby('org_id', as_index=False)['lat_end'].max()\n",
115-
" .rename(columns={'lat_end':'latest_si_yr'}))\n",
103+
" .groupby('org_id', as_index=False)['lat_end'].max()\n",
104+
" .rename(columns={'lat_end':'latest_si_yr'}))\n",
116105
"\n",
117106
"drf = drf.merge(si_latest, on='org_id', how='left')\n",
118107
"\n",
119108
"\n",
120109
"# Split planned vs actual; only drop blank measures\n",
121110
"drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()\n",
122111
"drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()\n",
123-
"\n",
112+
"\n"
113+
]
114+
},
115+
{
116+
"cell_type": "code",
117+
"execution_count": 5,
118+
"id": "3cf6e679",
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
124122
"# Drop any actuals from the fiscal year in progress\n",
125-
"# TODO: Turn this into a function that looks at the current datetime\n",
126-
"current_yr = 2026\n",
127-
"drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]\n",
128-
"\n",
129-
"# Determine the highest measure year for actuals\n",
130-
"latest_actuals = (drf_actuals\n",
131-
" .groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']\n",
132-
" .max()\n",
133-
" .rename(columns={'report_yr':'report_yr_actuals'})\n",
134-
")\n",
123+
"# The fiscal year in progress will be indicated with a \".\" in the ftes field\n",
124+
"# Determine the highest measure/report year for actuals, i.e. max without a \".\" in the ftes field\n",
125+
"latest_actuals = drf_actuals[(drf_actuals['spending_fte'] == 'ftes') & (drf_actuals['measure'] != '.')] \\\n",
126+
" .groupby(['org_id', 'program_id'], as_index=False) \\\n",
127+
" .agg(latest_report_yr_actuals=('report_yr', 'max'))\n",
128+
"\n",
129+
"# Merge in the highest measure year for actuals in the actuals table\n",
130+
"drf_actuals = drf_actuals.merge(latest_actuals, \n",
131+
" on=['org_id', 'program_id'],\n",
132+
" how='left') \n",
133+
"\n",
134+
"# Only keep actual years that are less than or equal to the latest actual report year\n",
135+
"# fillna(0) assures that all actual values are included, even if there are not associated actual report years\n",
136+
"drf_actuals = drf_actuals[\n",
137+
" drf_actuals['measure_yr'] <= (drf_actuals['latest_report_yr_actuals'].fillna(0))\n",
138+
"]\n",
139+
"\n",
135140
"\n",
136141
"# Merge in the highest measure year for actuals in the planned table\n",
137142
"drf_planned = drf_planned.merge(latest_actuals, \n",
138-
" on=['org_id', 'program_id', 'spending_fte'],\n",
143+
" on=['org_id', 'program_id'],\n",
139144
" how='left') \n",
140145
"\n",
141146
"# Only keep planned years that are greater than the latest actual report year\n",
142-
"# fillna(-np.inf) assures that all planned values are included, even if there are not associated actual report years\n",
147+
"# fillna(0) assures that all planned values are included, even if there are not associated actual report years\n",
143148
"drf_planned = drf_planned[\n",
144-
" drf_planned['measure_yr'] > (drf_planned['report_yr_actuals'].fillna(0))\n",
149+
" drf_planned['measure_yr'] > (drf_planned['latest_report_yr_actuals'].fillna(0))\n",
145150
"]\n",
146151
"\n",
147152
"# # # Each report year has 3 measure years for planned values.\n",
148153
"# # Only keep records that have the highest report year for that given program, measure type, and measure year\n",
149154
"idx = (drf_planned\n",
150-
" .groupby(['org_id', 'program_id', 'spending_fte', 'measure_yr'])['report_yr']\n",
151-
" .idxmax())\n",
155+
" .groupby(['org_id', 'program_id', 'spending_fte', 'measure_yr'])['report_yr']\n",
156+
" .idxmax())\n",
152157
"drf_planned = drf_planned.loc[idx]\n",
153158
"\n",
154159
"# # # Concatenate actuals and planned entries\n",
@@ -176,16 +181,8 @@
176181
"drf['report_yr'] = (drf['report_yr']-1).apply(str) +\"-\"+ (drf['report_yr']).apply(str)\n",
177182
"drf['measure_yr'] = (drf['measure_yr']-1).apply(str) +\"-\"+ (drf['measure_yr']).apply(str)\n",
178183
"drf['si_link_yr'] = (drf['si_link_yr']-1).apply(str) +\"-\"+ (drf['si_link_yr']).apply(str)\n",
179-
"drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)"
184+
"drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)\n"
180185
]
181-
},
182-
{
183-
"cell_type": "code",
184-
"execution_count": null,
185-
"id": "dacefaa6",
186-
"metadata": {},
187-
"outputs": [],
188-
"source": []
189186
}
190187
],
191188
"metadata": {

notebooks/experiment-template.ipynb

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": null,
66
"id": "fd2d413a-666f-49d4-b86c-7d6898a496a5",
77
"metadata": {},
88
"outputs": [],
@@ -28,8 +28,6 @@
2828
"import pytz\n",
2929
"from pathlib import Path\n",
3030
"\n",
31-
"\n",
32-
"\n",
3331
"base_dir = Path.cwd()\n",
3432
"parent_dir = base_dir.parent\n",
3533
"config = get_config()"

0 commit comments

Comments
 (0)