Skip to content

Commit 77bec14

Browse files
committed
bug fix on drf actuals
1 parent bf2eaed commit 77bec14

File tree

2 files changed

+44
-46
lines changed

2 files changed

+44
-46
lines changed

notebooks/drf-yrs.ipynb

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 2,
5+
"execution_count": 1,
66
"id": "fd2d413a-666f-49d4-b86c-7d6898a496a5",
77
"metadata": {},
88
"outputs": [],
@@ -29,7 +29,7 @@
2929
},
3030
{
3131
"cell_type": "code",
32-
"execution_count": 3,
32+
"execution_count": 2,
3333
"id": "45467185",
3434
"metadata": {},
3535
"outputs": [],
@@ -53,12 +53,20 @@
5353
},
5454
{
5555
"cell_type": "code",
56-
"execution_count": 4,
56+
"execution_count": null,
5757
"id": "e9dde87f",
5858
"metadata": {},
5959
"outputs": [],
6060
"source": [
61-
"# Load and normalize\n",
61+
"def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a number.\n",
62+
" return pd.to_numeric(fiscal_yr.split('-')[-1], errors='coerce')# Load and normalize\n",
63+
"\n",
64+
"def num_to_fy(number): # Returns the fiscal year in YYYY-YYYY format\n",
65+
" if (number > 999): \n",
66+
" return f'{number-1}-{number}'\n",
67+
" else:\n",
68+
" return np.nan\n",
69+
"\n",
6270
"drf = load_csv('rbpo.csv', config, False)\n",
6371
"drf = standardize_column_names(drf)\n",
6472
"drf['fiscal_yr'] = drf['fiscal_yr'].apply(clean_fiscal_yr)\n",
@@ -95,8 +103,11 @@
95103
"fy_end = pd.to_numeric(drf['fiscal_yr'].str.split('-').str[-1].astype(int), errors='coerce')\n",
96104
"\n",
97105
"# Calculate 4-digit 'measure_yr' and 'report_yr' from 'fiscal_yr' and 'yr_adjust'\n",
98-
"drf['report_yr'] = fy_end.astype('Int64')\n",
99-
"drf['measure_yr'] = (fy_end+ drf['yr_adjust']).astype('Int64')\n",
106+
"drf['report_yr'] = drf['fiscal_yr'].apply(fy_to_num)\n",
107+
"drf['measure_yr'] = (drf['fiscal_yr'].apply(fy_to_num)+drf['yr_adjust'])\n",
108+
"\n",
109+
"# drf['report_yr'] = fy_end.astype('Int64')\n",
110+
"# drf['measure_yr'] = (fy_end+ drf['yr_adjust']).astype('Int64')\n",
100111
"\n",
101112
"# Latest SI fiscal year per org (end year as int)\n",
102113
"si_latest = (si.assign(lat_end=pd.to_numeric(si['fiscal_yr'].str.split('-').str[-1], errors='coerce'))\n",
@@ -109,16 +120,7 @@
109120
"# Split planned vs actual; only drop blank measures\n",
110121
"drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()\n",
111122
"drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()\n",
112-
"\n"
113-
]
114-
},
115-
{
116-
"cell_type": "code",
117-
"execution_count": 5,
118-
"id": "3cf6e679",
119-
"metadata": {},
120-
"outputs": [],
121-
"source": [
123+
"\n",
122124
"# Drop any actuals from the fiscal year in progress\n",
123125
"# The fiscal year in progress will be indicated with a \".\" in the ftes field\n",
124126
"# Determine the highest measure/report year for actuals, i.e. max without a \".\" in the ftes field\n",
@@ -136,6 +138,7 @@
136138
"drf_actuals = drf_actuals[\n",
137139
" drf_actuals['measure_yr'] <= (drf_actuals['latest_report_yr_actuals'].fillna(0))\n",
138140
"]\n",
141+
"drf_actuals['measure'] = drf_actuals['measure'].replace('.', 0)\n",
139142
"\n",
140143
"\n",
141144
"# Merge in the highest measure year for actuals in the planned table\n",
@@ -178,10 +181,10 @@
178181
"drf['si_link_yr'] = drf['si_link_yr'].astype('Int64')\n",
179182
"\n",
180183
"# # # Return years to fiscal year YYYY-YYYY format\n",
181-
"drf['report_yr'] = (drf['report_yr']-1).apply(str) +\"-\"+ (drf['report_yr']).apply(str)\n",
182-
"drf['measure_yr'] = (drf['measure_yr']-1).apply(str) +\"-\"+ (drf['measure_yr']).apply(str)\n",
183-
"drf['si_link_yr'] = (drf['si_link_yr']-1).apply(str) +\"-\"+ (drf['si_link_yr']).apply(str)\n",
184-
"drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)\n"
184+
"drf['report_yr'] = drf['report_yr'].apply(num_to_fy)\n",
185+
"drf['measure_yr'] = drf['measure_yr'].apply(num_to_fy)\n",
186+
"drf['si_link_yr'] = drf['si_link_yr'].apply(num_to_fy)\n",
187+
"drf['latest_si_yr'] = drf['latest_si_yr'].apply(num_to_fy)\n"
185188
]
186189
}
187190
],

src/utils.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pandas as pd
2+
import numpy as np
23
import json
34
import logging
45
logger = logging.getLogger(__name__)
@@ -111,8 +112,15 @@ def build_drf(si, config, snapshot=False):
111112
"""
112113
Load and clean DRF data (i.e. RBPO). Refer to snapshot if necessary!
113114
"""
115+
def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a number.
116+
return pd.to_numeric(fiscal_yr.split('-')[-1], errors='coerce')# Load and normalize
117+
118+
def num_to_fy(number): # Returns the fiscal year in YYYY-YYYY format
119+
if (number > 999):
120+
return f'{number-1}-{number}'
121+
else:
122+
return np.nan
114123
try:
115-
# Load and normalize
116124
drf = load_csv('rbpo.csv', config, False)
117125
drf = standardize_column_names(drf)
118126
drf['fiscal_yr'] = drf['fiscal_yr'].apply(clean_fiscal_yr)
@@ -145,12 +153,9 @@ def build_drf(si, config, snapshot=False):
145153
drf[['planned_actual', 'spending_fte', 'yr_adjust']] = drf['plan_actual_spendfte_yr'].str.split('_', n=2, expand=True)
146154
drf['yr_adjust'] = drf['yr_adjust'].fillna('1').astype(int) - 1
147155

148-
# Parse fiscal year end (YYYY-YYYY -> second part)
149-
fy_end = pd.to_numeric(drf['fiscal_yr'].str.split('-').str[-1].astype(int), errors='coerce')
150-
151156
# Calculate 4-digit 'measure_yr' and 'report_yr' from 'fiscal_yr' and 'yr_adjust'
152-
drf['report_yr'] = fy_end.astype('Int64')
153-
drf['measure_yr'] = (fy_end+ drf['yr_adjust']).astype('Int64')
157+
drf['report_yr'] = drf['fiscal_yr'].apply(fy_to_num)
158+
drf['measure_yr'] = (drf['fiscal_yr'].apply(fy_to_num)+drf['yr_adjust'])
154159

155160
# Latest SI fiscal year per org (end year as int)
156161
si_latest = (si.assign(lat_end=pd.to_numeric(si['fiscal_yr'].str.split('-').str[-1], errors='coerce'))
@@ -159,23 +164,11 @@ def build_drf(si, config, snapshot=False):
159164

160165
drf = drf.merge(si_latest, on='org_id', how='left')
161166

162-
163167
# Split planned vs actual; only drop blank measures
164168
drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()
165169
drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()
166170

167171
# Drop any actuals from the fiscal year in progress
168-
# # TODO: Turn this into a function that doesn't need manual intevention
169-
# current_yr = 2026
170-
# drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]
171-
172-
# # Determine the highest measure year for actuals
173-
# latest_actuals = (drf_actuals
174-
# .groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']
175-
# .max()
176-
# .rename(columns={'report_yr':'report_yr_actuals'})
177-
# )
178-
179172
# The fiscal year in progress will be indicated with a "." in the ftes field
180173
# Determine the highest measure/report year for actuals, i.e. max without a "." in the ftes field
181174
latest_actuals = drf_actuals[(drf_actuals['spending_fte'] == 'ftes') & (drf_actuals['measure'] != '.')] \
@@ -192,14 +185,16 @@ def build_drf(si, config, snapshot=False):
192185
drf_actuals = drf_actuals[
193186
drf_actuals['measure_yr'] <= (drf_actuals['latest_report_yr_actuals'].fillna(0))
194187
]
188+
drf_actuals['measure'] = drf_actuals['measure'].replace('.', 0)
189+
195190

196191
# Merge in the highest measure year for actuals in the planned table
197192
drf_planned = drf_planned.merge(latest_actuals,
198193
on=['org_id', 'program_id'],
199194
how='left')
200195

201196
# Only keep planned years that are greater than the latest actual report year
202-
# fillna(-np.inf) assures that all planned values are included, even if there are not associated actual report years
197+
# fillna(0) assures that all planned values are included, even if there are not associated actual report years
203198
drf_planned = drf_planned[
204199
drf_planned['measure_yr'] > (drf_planned['latest_report_yr_actuals'].fillna(0))
205200
]
@@ -233,10 +228,11 @@ def build_drf(si, config, snapshot=False):
233228
drf['si_link_yr'] = drf['si_link_yr'].astype('Int64')
234229

235230
# # # Return years to fiscal year YYYY-YYYY format
236-
drf['report_yr'] = (drf['report_yr']-1).apply(str) +"-"+ (drf['report_yr']).apply(str)
237-
drf['measure_yr'] = (drf['measure_yr']-1).apply(str) +"-"+ (drf['measure_yr']).apply(str)
238-
drf['si_link_yr'] = (drf['si_link_yr']-1).apply(str) +"-"+ (drf['si_link_yr']).apply(str)
239-
drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +"-"+ (drf['latest_si_yr']).apply(str)
231+
drf['report_yr'] = drf['report_yr'].apply(num_to_fy)
232+
drf['measure_yr'] = drf['measure_yr'].apply(num_to_fy)
233+
drf['si_link_yr'] = drf['si_link_yr'].apply(num_to_fy)
234+
drf['latest_si_yr'] = drf['latest_si_yr'].apply(num_to_fy)
235+
240236

241237
if snapshot:
242238
OUTPUT_DIR = config['output_dir'] / 'snapshots' / snapshot
@@ -251,9 +247,8 @@ def build_drf(si, config, snapshot=False):
251247
)
252248

253249
return drf
254-
255-
except Exception as e:
256-
logger.error("Error: %s", e, exc_info=True)
250+
251+
except Exception as e: logger.error("Error: %s", e, exc_info=True)
257252

258253
def build_ifoi(config):
259254
try:

0 commit comments

Comments
 (0)