11import pandas as pd
2+ import numpy as np
23import json
34import logging
45logger = logging .getLogger (__name__ )
@@ -111,8 +112,15 @@ def build_drf(si, config, snapshot=False):
111112 """
112113 Load and clean DRF data (i.e. RBPO). Refer to snapshot if necessary!
113114 """
115+ def fy_to_num (fiscal_yr ): # Returns the year in which the fiscal year ends, as a number.
116+ return pd .to_numeric (fiscal_yr .split ('-' )[- 1 ], errors = 'coerce' )# Load and normalize
117+
118+ def num_to_fy (number ): # Returns the fiscal year in YYYY-YYYY format
119+ if (number > 999 ):
120+ return f'{ number - 1 } -{ number } '
121+ else :
122+ return np .nan
114123 try :
115- # Load and normalize
116124 drf = load_csv ('rbpo.csv' , config , False )
117125 drf = standardize_column_names (drf )
118126 drf ['fiscal_yr' ] = drf ['fiscal_yr' ].apply (clean_fiscal_yr )
@@ -145,12 +153,9 @@ def build_drf(si, config, snapshot=False):
145153 drf [['planned_actual' , 'spending_fte' , 'yr_adjust' ]] = drf ['plan_actual_spendfte_yr' ].str .split ('_' , n = 2 , expand = True )
146154 drf ['yr_adjust' ] = drf ['yr_adjust' ].fillna ('1' ).astype (int ) - 1
147155
148- # Parse fiscal year end (YYYY-YYYY -> second part)
149- fy_end = pd .to_numeric (drf ['fiscal_yr' ].str .split ('-' ).str [- 1 ].astype (int ), errors = 'coerce' )
150-
151156 # Calculate 4-digit 'measure_yr' and 'report_yr' from 'fiscal_yr' and 'yr_adjust'
152- drf ['report_yr' ] = fy_end . astype ( 'Int64' )
153- drf ['measure_yr' ] = (fy_end + drf ['yr_adjust' ]). astype ( 'Int64' )
157+ drf ['report_yr' ] = drf [ 'fiscal_yr' ]. apply ( fy_to_num )
158+ drf ['measure_yr' ] = (drf ['fiscal_yr' ]. apply ( fy_to_num ) + drf [ 'yr_adjust' ] )
154159
155160 # Latest SI fiscal year per org (end year as int)
156161 si_latest = (si .assign (lat_end = pd .to_numeric (si ['fiscal_yr' ].str .split ('-' ).str [- 1 ], errors = 'coerce' ))
@@ -159,23 +164,11 @@ def build_drf(si, config, snapshot=False):
159164
160165 drf = drf .merge (si_latest , on = 'org_id' , how = 'left' )
161166
162-
163167 # Split planned vs actual; only drop blank measures
164168 drf_actuals = drf [drf ['planned_actual' ]== 'actual' ].dropna (subset = ['measure' ]).copy ()
165169 drf_planned = drf [drf ['planned_actual' ]== 'planned' ].dropna (subset = ['measure' ]).copy ()
166170
167171 # Drop any actuals from the fiscal year in progress
168- # # TODO: Turn this into a function that doesn't need manual intevention
169- # current_yr = 2026
170- # drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]
171-
172- # # Determine the highest measure year for actuals
173- # latest_actuals = (drf_actuals
174- # .groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']
175- # .max()
176- # .rename(columns={'report_yr':'report_yr_actuals'})
177- # )
178-
179172 # The fiscal year in progress will be indicated with a "." in the ftes field
180173 # Determine the highest measure/report year for actuals, i.e. max without a "." in the ftes field
181174 latest_actuals = drf_actuals [(drf_actuals ['spending_fte' ] == 'ftes' ) & (drf_actuals ['measure' ] != '.' )] \
@@ -192,14 +185,16 @@ def build_drf(si, config, snapshot=False):
192185 drf_actuals = drf_actuals [
193186 drf_actuals ['measure_yr' ] <= (drf_actuals ['latest_report_yr_actuals' ].fillna (0 ))
194187 ]
188+ drf_actuals ['measure' ] = drf_actuals ['measure' ].replace ('.' , 0 )
189+
195190
196191 # Merge in the highest measure year for actuals in the planned table
197192 drf_planned = drf_planned .merge (latest_actuals ,
198193 on = ['org_id' , 'program_id' ],
199194 how = 'left' )
200195
201196 # Only keep planned years that are greater than the latest actual report year
202- # fillna(-np.inf ) assures that all planned values are included, even if there are not associated actual report years
197+ # fillna(0 ) assures that all planned values are included, even if there are not associated actual report years
203198 drf_planned = drf_planned [
204199 drf_planned ['measure_yr' ] > (drf_planned ['latest_report_yr_actuals' ].fillna (0 ))
205200 ]
@@ -233,10 +228,11 @@ def build_drf(si, config, snapshot=False):
233228 drf ['si_link_yr' ] = drf ['si_link_yr' ].astype ('Int64' )
234229
235230 # # # Return years to fiscal year YYYY-YYYY format
236- drf ['report_yr' ] = (drf ['report_yr' ]- 1 ).apply (str ) + "-" + (drf ['report_yr' ]).apply (str )
237- drf ['measure_yr' ] = (drf ['measure_yr' ]- 1 ).apply (str ) + "-" + (drf ['measure_yr' ]).apply (str )
238- drf ['si_link_yr' ] = (drf ['si_link_yr' ]- 1 ).apply (str ) + "-" + (drf ['si_link_yr' ]).apply (str )
239- drf ['latest_si_yr' ] = (drf ['latest_si_yr' ]- 1 ).apply (str ) + "-" + (drf ['latest_si_yr' ]).apply (str )
231+ drf ['report_yr' ] = drf ['report_yr' ].apply (num_to_fy )
232+ drf ['measure_yr' ] = drf ['measure_yr' ].apply (num_to_fy )
233+ drf ['si_link_yr' ] = drf ['si_link_yr' ].apply (num_to_fy )
234+ drf ['latest_si_yr' ] = drf ['latest_si_yr' ].apply (num_to_fy )
235+
240236
241237 if snapshot :
242238 OUTPUT_DIR = config ['output_dir' ] / 'snapshots' / snapshot
@@ -251,9 +247,8 @@ def build_drf(si, config, snapshot=False):
251247 )
252248
253249 return drf
254-
255- except Exception as e :
256- logger .error ("Error: %s" , e , exc_info = True )
250+
251+ except Exception as e : logger .error ("Error: %s" , e , exc_info = True )
257252
258253def build_ifoi (config ):
259254 try :
0 commit comments