Skip to content

Commit 0c9bd0b

Browse files
committed
remove partial actual yrs from drf
1 parent 9d129f2 commit 0c9bd0b

File tree

2 files changed

+11
-99
lines changed

2 files changed

+11
-99
lines changed

notebooks/drf-yrs.ipynb

Lines changed: 6 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -51,112 +51,14 @@
5151
"si = pd.read_csv(si_path, keep_default_na=False, na_values='', delimiter=';', engine='python', skipfooter=2)"
5252
]
5353
},
54-
{
55-
"cell_type": "code",
56-
"execution_count": null,
57-
"id": "4071d216",
58-
"metadata": {},
59-
"outputs": [],
60-
"source": [
61-
"drf = load_csv('rbpo.csv', config, False)\n",
62-
"drf = standardize_column_names(drf)\n",
63-
"drf['fiscal_yr'] = drf['fiscal_yr'].apply(clean_fiscal_yr)\n",
64-
"\n",
65-
"# Define columns related to planned and actual measures: spending and FTEs \n",
66-
"fte_spend_cols = [\n",
67-
" 'planned_spending_1', \n",
68-
" 'actual_spending', \n",
69-
" 'planned_spending_2', \n",
70-
" 'planned_spending_3',\n",
71-
" 'planned_ftes_1', \n",
72-
" 'actual_ftes', \n",
73-
" 'planned_ftes_2', \n",
74-
" 'planned_ftes_3'\n",
75-
"]\n",
76-
"\n",
77-
"# Melt (unpivot) the DataFrame to long format\n",
78-
"drf = pd.melt(\n",
79-
" drf, \n",
80-
" id_vars=['fiscal_yr', 'org_id', 'program_id'], \n",
81-
" value_vars=fte_spend_cols, \n",
82-
" var_name='plan_actual_spendfte_yr', \n",
83-
" value_name='measure'\n",
84-
")\n",
85-
"\n",
86-
"# Split 'plan_actual_yr' into separate columns for planned/actual, spending/FTEs, and year adjustment\n",
87-
"drf[['planned_actual', 'spending_fte', 'yr_adjust']] = drf['plan_actual_spendfte_yr'].str.split('_', expand=True)\n",
88-
"drf['yr_adjust'] = drf['yr_adjust'].fillna('1').astype(int) - 1\n",
89-
"\n",
90-
"\n",
91-
"# Calculate 4-digit 'measure_yr' and 'report_yr' from 'fiscal_yr' and 'yr_adjust'\n",
92-
"drf['measure_yr'] = drf['fiscal_yr'].str.split('-').str[1].astype(int) + drf['yr_adjust']\n",
93-
"drf['report_yr'] = drf['fiscal_yr'].str.split('-').str[1].astype(int)\n",
94-
"\n",
95-
"latest_si_fy_by_org = si.groupby('org_id')['fiscal_yr'].max()\n",
96-
"\n",
97-
"drf = pd.merge(drf, latest_si_fy_by_org, on='org_id', how='left', suffixes=['', '_si'])\n",
98-
"drf['fiscal_yr_si'] = drf['fiscal_yr_si'].fillna('0-0')\n",
99-
"drf['latest_si_yr'] = drf['fiscal_yr_si'].str.split('-').str[1].astype(int)\n",
100-
"\n",
101-
"drf_actuals = drf[\n",
102-
" (drf['planned_actual'] == 'actual')\n",
103-
"].dropna()\n",
104-
"\n",
105-
"drf_planned = drf[\n",
106-
" (drf['planned_actual'] == 'planned')\n",
107-
"].dropna()\n",
108-
"\n",
109-
"# Determine the highest measure year for actuals\n",
110-
"latest_actuals = drf_actuals.groupby(['org_id', 'program_id', 'spending_fte'])['report_yr'].max().reset_index()\n",
111-
"\n",
112-
"# Merge in the highest measure year for actuals in the planned table\n",
113-
"drf_planned = pd.merge(left=drf_planned, right=latest_actuals, how='left', on=['org_id', 'program_id', 'spending_fte'], suffixes=['', '_actuals']) \n",
114-
"\n",
115-
"# Only keep planned years that are greater than the latest actual report year\n",
116-
"drf_planned = drf_planned[drf_planned['measure_yr']>drf_planned['report_yr_actuals']]\n",
117-
"\n",
118-
"# # # Each report year has 3 measure years for planned values.\n",
119-
"# # Only keep records that have the highest report year for that given program, measure type, and measure year\n",
120-
"idx = drf_planned.groupby(['org_id', 'program_id', 'spending_fte', 'measure_yr'])['report_yr'].idxmax()\n",
121-
"drf_planned = drf_planned.loc[idx]\n",
122-
"\n",
123-
"# # # Concatenate actuals and planned entries\n",
124-
"drf = pd.concat([drf_actuals, drf_planned])\n",
125-
"\n",
126-
"drf = drf[[\n",
127-
" 'org_id', \n",
128-
" 'latest_si_yr', \n",
129-
" 'program_id', \n",
130-
" 'report_yr', \n",
131-
" 'measure_yr', \n",
132-
" 'planned_actual', \n",
133-
" 'spending_fte',\n",
134-
" 'measure']].reset_index(drop=True)\n",
135-
"\n",
136-
"# # Set up si_link_yr: a fiscal year column to be able to include years \n",
137-
"# # beyond the service inventory when joining by service id and fy.\n",
138-
"# # if measure year > latest service fy, = latest service fy, else use measure_yr\n",
139-
"drf.loc[drf['measure_yr']>drf['latest_si_yr'], 'si_link_yr'] = drf['latest_si_yr']\n",
140-
"drf.loc[drf['measure_yr']<=drf['latest_si_yr'], 'si_link_yr'] = drf['measure_yr']\n",
141-
"drf['si_link_yr'] = drf['si_link_yr'].astype(int)\n",
142-
"\n",
143-
"# # # Return years to fiscal year YYYY-YYYY format\n",
144-
"drf['report_yr'] = (drf['report_yr']-1).apply(str) +\"-\"+ (drf['report_yr']).apply(str)\n",
145-
"drf['measure_yr'] = (drf['measure_yr']-1).apply(str) +\"-\"+ (drf['measure_yr']).apply(str)\n",
146-
"drf['si_link_yr'] = (drf['si_link_yr']-1).apply(str) +\"-\"+ (drf['si_link_yr']).apply(str)\n",
147-
"drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)\n",
148-
"\n",
149-
"\n"
150-
]
151-
},
15254
{
15355
"cell_type": "code",
15456
"execution_count": null,
15557
"id": "2373cc17",
15658
"metadata": {},
15759
"outputs": [],
15860
"source": [
159-
"drf.loc[(drf['org_id']==130)&(drf['program_id']=='ISS02')]\n",
61+
"drf.loc[(drf['program_id']=='BRB01')]\n",
16062
"# drf['valid_plan'] = (drf['report_yr'] > latest_si_fy)\n",
16163
"\n",
16264
"# drf[(drf['org_id']==130) & (drf['program_id']=='ISS02') & (drf['planned_actual']=='planned') & (drf['report_yr'] > latest_si_fy)]\n"
@@ -219,6 +121,11 @@
219121
"drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()\n",
220122
"drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()\n",
221123
"\n",
124+
"# Drop any actuals from the fiscal year in progress\n",
125+
"# TODO: Turn this into a function that looks at the current datetime\n",
126+
"current_yr = 2026\n",
127+
"drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]\n",
128+
"\n",
222129
"# Determine the highest measure year for actuals\n",
223130
"latest_actuals = (drf_actuals\n",
224131
" .groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']\n",

src/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ def build_drf(si, config, snapshot=False):
155155
drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()
156156
drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()
157157

158+
# Drop any actuals from the fiscal year in progress
159+
# TODO: Turn this into a function that looks at the current datetime
160+
current_yr = 2026
161+
drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]
162+
158163
# Determine the highest measure year for actuals
159164
latest_actuals = (drf_actuals
160165
.groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']

0 commit comments

Comments
 (0)