|
51 | 51 | "si = pd.read_csv(si_path, keep_default_na=False, na_values='', delimiter=';', engine='python', skipfooter=2)" |
52 | 52 | ] |
53 | 53 | }, |
54 | | - { |
55 | | - "cell_type": "code", |
56 | | - "execution_count": null, |
57 | | - "id": "4071d216", |
58 | | - "metadata": {}, |
59 | | - "outputs": [], |
60 | | - "source": [ |
61 | | - "drf = load_csv('rbpo.csv', config, False)\n", |
62 | | - "drf = standardize_column_names(drf)\n", |
63 | | - "drf['fiscal_yr'] = drf['fiscal_yr'].apply(clean_fiscal_yr)\n", |
64 | | - "\n", |
65 | | - "# Define columns related to planned and actual measures: spending and FTEs \n", |
66 | | - "fte_spend_cols = [\n", |
67 | | - " 'planned_spending_1', \n", |
68 | | - " 'actual_spending', \n", |
69 | | - " 'planned_spending_2', \n", |
70 | | - " 'planned_spending_3',\n", |
71 | | - " 'planned_ftes_1', \n", |
72 | | - " 'actual_ftes', \n", |
73 | | - " 'planned_ftes_2', \n", |
74 | | - " 'planned_ftes_3'\n", |
75 | | - "]\n", |
76 | | - "\n", |
77 | | - "# Melt (unpivot) the DataFrame to long format\n", |
78 | | - "drf = pd.melt(\n", |
79 | | - " drf, \n", |
80 | | - " id_vars=['fiscal_yr', 'org_id', 'program_id'], \n", |
81 | | - " value_vars=fte_spend_cols, \n", |
82 | | - " var_name='plan_actual_spendfte_yr', \n", |
83 | | - " value_name='measure'\n", |
84 | | - ")\n", |
85 | | - "\n", |
86 | | - "# Split 'plan_actual_yr' into separate columns for planned/actual, spending/FTEs, and year adjustment\n", |
87 | | - "drf[['planned_actual', 'spending_fte', 'yr_adjust']] = drf['plan_actual_spendfte_yr'].str.split('_', expand=True)\n", |
88 | | - "drf['yr_adjust'] = drf['yr_adjust'].fillna('1').astype(int) - 1\n", |
89 | | - "\n", |
90 | | - "\n", |
91 | | - "# Calculate 4-digit 'measure_yr' and 'report_yr' from 'fiscal_yr' and 'yr_adjust'\n", |
92 | | - "drf['measure_yr'] = drf['fiscal_yr'].str.split('-').str[1].astype(int) + drf['yr_adjust']\n", |
93 | | - "drf['report_yr'] = drf['fiscal_yr'].str.split('-').str[1].astype(int)\n", |
94 | | - "\n", |
95 | | - "latest_si_fy_by_org = si.groupby('org_id')['fiscal_yr'].max()\n", |
96 | | - "\n", |
97 | | - "drf = pd.merge(drf, latest_si_fy_by_org, on='org_id', how='left', suffixes=['', '_si'])\n", |
98 | | - "drf['fiscal_yr_si'] = drf['fiscal_yr_si'].fillna('0-0')\n", |
99 | | - "drf['latest_si_yr'] = drf['fiscal_yr_si'].str.split('-').str[1].astype(int)\n", |
100 | | - "\n", |
101 | | - "drf_actuals = drf[\n", |
102 | | - " (drf['planned_actual'] == 'actual')\n", |
103 | | - "].dropna()\n", |
104 | | - "\n", |
105 | | - "drf_planned = drf[\n", |
106 | | - " (drf['planned_actual'] == 'planned')\n", |
107 | | - "].dropna()\n", |
108 | | - "\n", |
109 | | - "# Determine the highest measure year for actuals\n", |
110 | | - "latest_actuals = drf_actuals.groupby(['org_id', 'program_id', 'spending_fte'])['report_yr'].max().reset_index()\n", |
111 | | - "\n", |
112 | | - "# Merge in the highest measure year for actuals in the planned table\n", |
113 | | - "drf_planned = pd.merge(left=drf_planned, right=latest_actuals, how='left', on=['org_id', 'program_id', 'spending_fte'], suffixes=['', '_actuals']) \n", |
114 | | - "\n", |
115 | | - "# Only keep planned years that are greater than the latest actual report year\n", |
116 | | - "drf_planned = drf_planned[drf_planned['measure_yr']>drf_planned['report_yr_actuals']]\n", |
117 | | - "\n", |
118 | | - "# # # Each report year has 3 measure years for planned values.\n", |
119 | | - "# # Only keep records that have the highest report year for that given program, measure type, and measure year\n", |
120 | | - "idx = drf_planned.groupby(['org_id', 'program_id', 'spending_fte', 'measure_yr'])['report_yr'].idxmax()\n", |
121 | | - "drf_planned = drf_planned.loc[idx]\n", |
122 | | - "\n", |
123 | | - "# # # Concatenate actuals and planned entries\n", |
124 | | - "drf = pd.concat([drf_actuals, drf_planned])\n", |
125 | | - "\n", |
126 | | - "drf = drf[[\n", |
127 | | - " 'org_id', \n", |
128 | | - " 'latest_si_yr', \n", |
129 | | - " 'program_id', \n", |
130 | | - " 'report_yr', \n", |
131 | | - " 'measure_yr', \n", |
132 | | - " 'planned_actual', \n", |
133 | | - " 'spending_fte',\n", |
134 | | - " 'measure']].reset_index(drop=True)\n", |
135 | | - "\n", |
136 | | - "# # Set up si_link_yr: a fiscal year column to be able to include years \n", |
137 | | - "# # beyond the service inventory when joining by service id and fy.\n", |
138 | | - "# # if measure year > latest service fy, = latest service fy, else use measure_yr\n", |
139 | | - "drf.loc[drf['measure_yr']>drf['latest_si_yr'], 'si_link_yr'] = drf['latest_si_yr']\n", |
140 | | - "drf.loc[drf['measure_yr']<=drf['latest_si_yr'], 'si_link_yr'] = drf['measure_yr']\n", |
141 | | - "drf['si_link_yr'] = drf['si_link_yr'].astype(int)\n", |
142 | | - "\n", |
143 | | - "# # # Return years to fiscal year YYYY-YYYY format\n", |
144 | | - "drf['report_yr'] = (drf['report_yr']-1).apply(str) +\"-\"+ (drf['report_yr']).apply(str)\n", |
145 | | - "drf['measure_yr'] = (drf['measure_yr']-1).apply(str) +\"-\"+ (drf['measure_yr']).apply(str)\n", |
146 | | - "drf['si_link_yr'] = (drf['si_link_yr']-1).apply(str) +\"-\"+ (drf['si_link_yr']).apply(str)\n", |
147 | | - "drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)\n", |
148 | | - "\n", |
149 | | - "\n" |
150 | | - ] |
151 | | - }, |
152 | 54 | { |
153 | 55 | "cell_type": "code", |
154 | 56 | "execution_count": null, |
155 | 57 | "id": "2373cc17", |
156 | 58 | "metadata": {}, |
157 | 59 | "outputs": [], |
158 | 60 | "source": [ |
159 | | - "drf.loc[(drf['org_id']==130)&(drf['program_id']=='ISS02')]\n", |
| 61 | + "drf.loc[(drf['program_id']=='BRB01')]\n", |
160 | 62 | "# drf['valid_plan'] = (drf['report_yr'] > latest_si_fy)\n", |
161 | 63 | "\n", |
162 | 64 | "# drf[(drf['org_id']==130) & (drf['program_id']=='ISS02') & (drf['planned_actual']=='planned') & (drf['report_yr'] > latest_si_fy)]\n" |
|
219 | 121 | "drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()\n", |
220 | 122 | "drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()\n", |
221 | 123 | "\n", |
| 124 | + "# Drop any actuals from the fiscal year in progress\n", |
| 125 | + "# TODO: Turn this into a function that looks at the current datetime\n", |
| 126 | + "current_yr = 2026\n", |
| 127 | + "drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]\n", |
| 128 | + "\n", |
222 | 129 | "# Determine the highest measure year for actuals\n", |
223 | 130 | "latest_actuals = (drf_actuals\n", |
224 | 131 | " .groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']\n", |
|
0 commit comments