|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | | - "execution_count": null, |
| 5 | + "execution_count": 2, |
6 | 6 | "id": "fd2d413a-666f-49d4-b86c-7d6898a496a5", |
7 | 7 | "metadata": {}, |
8 | 8 | "outputs": [], |
|
29 | 29 | }, |
30 | 30 | { |
31 | 31 | "cell_type": "code", |
32 | | - "execution_count": null, |
| 32 | + "execution_count": 3, |
33 | 33 | "id": "45467185", |
34 | 34 | "metadata": {}, |
35 | 35 | "outputs": [], |
|
53 | 53 | }, |
54 | 54 | { |
55 | 55 | "cell_type": "code", |
56 | | - "execution_count": null, |
57 | | - "id": "2373cc17", |
58 | | - "metadata": {}, |
59 | | - "outputs": [], |
60 | | - "source": [ |
61 | | - "drf.loc[(drf['program_id']=='BRB01')]\n", |
62 | | - "# drf['valid_plan'] = (drf['report_yr'] > latest_si_fy)\n", |
63 | | - "\n", |
64 | | - "# drf[(drf['org_id']==130) & (drf['program_id']=='ISS02') & (drf['planned_actual']=='planned') & (drf['report_yr'] > latest_si_fy)]\n" |
65 | | - ] |
66 | | - }, |
67 | | - { |
68 | | - "cell_type": "code", |
69 | | - "execution_count": null, |
70 | | - "id": "a5c0a639", |
| 56 | + "execution_count": 4, |
| 57 | + "id": "e9dde87f", |
71 | 58 | "metadata": {}, |
72 | 59 | "outputs": [], |
73 | 60 | "source": [ |
|
76 | 63 | "drf = standardize_column_names(drf)\n", |
77 | 64 | "drf['fiscal_yr'] = drf['fiscal_yr'].apply(clean_fiscal_yr)\n", |
78 | 65 | "\n", |
| 66 | + "si['org_id']=pd.to_numeric(si['org_id'].astype(int), errors='coerce')\n", |
| 67 | + "\n", |
79 | 68 | "# Define columns related to planned and actual measures: spending and FTEs\n", |
80 | 69 | "# These columns will be unpivoted / melted\n", |
81 | 70 | "fte_spend_cols = [\n", |
|
111 | 100 | "\n", |
112 | 101 | "# Latest SI fiscal year per org (end year as int)\n", |
113 | 102 | "si_latest = (si.assign(lat_end=pd.to_numeric(si['fiscal_yr'].str.split('-').str[-1], errors='coerce'))\n", |
114 | | - " .groupby('org_id', as_index=False)['lat_end'].max()\n", |
115 | | - " .rename(columns={'lat_end':'latest_si_yr'}))\n", |
| 103 | + " .groupby('org_id', as_index=False)['lat_end'].max()\n", |
| 104 | + " .rename(columns={'lat_end':'latest_si_yr'}))\n", |
116 | 105 | "\n", |
117 | 106 | "drf = drf.merge(si_latest, on='org_id', how='left')\n", |
118 | 107 | "\n", |
119 | 108 | "\n", |
120 | 109 | "# Split planned vs actual; only drop blank measures\n", |
121 | 110 | "drf_actuals = drf[drf['planned_actual']=='actual'].dropna(subset=['measure']).copy()\n", |
122 | 111 | "drf_planned = drf[drf['planned_actual']=='planned'].dropna(subset=['measure']).copy()\n", |
123 | | - "\n", |
| 112 | + "\n" |
| 113 | + ] |
| 114 | + }, |
| 115 | + { |
| 116 | + "cell_type": "code", |
| 117 | + "execution_count": 5, |
| 118 | + "id": "3cf6e679", |
| 119 | + "metadata": {}, |
| 120 | + "outputs": [], |
| 121 | + "source": [ |
124 | 122 | "# Drop any actuals from the fiscal year in progress\n", |
125 | | - "# TODO: Turn this into a function that looks at the current datetime\n", |
126 | | - "current_yr = 2026\n", |
127 | | - "drf_actuals = drf_actuals[drf_actuals['measure_yr']<current_yr]\n", |
128 | | - "\n", |
129 | | - "# Determine the highest measure year for actuals\n", |
130 | | - "latest_actuals = (drf_actuals\n", |
131 | | - " .groupby(['org_id', 'program_id', 'spending_fte'], as_index=False)['report_yr']\n", |
132 | | - " .max()\n", |
133 | | - " .rename(columns={'report_yr':'report_yr_actuals'})\n", |
134 | | - ")\n", |
| 123 | + "# The fiscal year in progress will be indicated with a \".\" in the ftes field\n", |
| 124 | + "# Determine the highest measure/report year for actuals, i.e. max without a \".\" in the ftes field\n", |
| 125 | + "latest_actuals = drf_actuals[(drf_actuals['spending_fte'] == 'ftes') & (drf_actuals['measure'] != '.')] \\\n", |
| 126 | + " .groupby(['org_id', 'program_id'], as_index=False) \\\n", |
| 127 | + " .agg(latest_report_yr_actuals=('report_yr', 'max'))\n", |
| 128 | + "\n", |
| 129 | + "# Merge in the highest measure year for actuals in the actuals table\n", |
| 130 | + "drf_actuals = drf_actuals.merge(latest_actuals, \n", |
| 131 | + " on=['org_id', 'program_id'],\n", |
| 132 | + " how='left') \n", |
| 133 | + "\n", |
| 134 | + "# Only keep actual years that are less than or equal to the latest actual report year\n", |
| 135 | + "# fillna(0) assures that all actual values are included, even if there are not associated actual report years\n", |
| 136 | + "drf_actuals = drf_actuals[\n", |
| 137 | + " drf_actuals['measure_yr'] <= (drf_actuals['latest_report_yr_actuals'].fillna(0))\n", |
| 138 | + "]\n", |
| 139 | + "\n", |
135 | 140 | "\n", |
136 | 141 | "# Merge in the highest measure year for actuals in the planned table\n", |
137 | 142 | "drf_planned = drf_planned.merge(latest_actuals, \n", |
138 | | - " on=['org_id', 'program_id', 'spending_fte'],\n", |
| 143 | + " on=['org_id', 'program_id'],\n", |
139 | 144 | " how='left') \n", |
140 | 145 | "\n", |
141 | 146 | "# Only keep planned years that are greater than the latest actual report year\n", |
142 | | - "# fillna(-np.inf) assures that all planned values are included, even if there are not associated actual report years\n", |
| 147 | + "# fillna(0) assures that all planned values are included, even if there are not associated actual report years\n", |
143 | 148 | "drf_planned = drf_planned[\n", |
144 | | - " drf_planned['measure_yr'] > (drf_planned['report_yr_actuals'].fillna(0))\n", |
| 149 | + " drf_planned['measure_yr'] > (drf_planned['latest_report_yr_actuals'].fillna(0))\n", |
145 | 150 | "]\n", |
146 | 151 | "\n", |
147 | 152 | "# # # Each report year has 3 measure years for planned values.\n", |
148 | 153 | "# # Only keep records that have the highest report year for that given program, measure type, and measure year\n", |
149 | 154 | "idx = (drf_planned\n", |
150 | | - " .groupby(['org_id', 'program_id', 'spending_fte', 'measure_yr'])['report_yr']\n", |
151 | | - " .idxmax())\n", |
| 155 | + " .groupby(['org_id', 'program_id', 'spending_fte', 'measure_yr'])['report_yr']\n", |
| 156 | + " .idxmax())\n", |
152 | 157 | "drf_planned = drf_planned.loc[idx]\n", |
153 | 158 | "\n", |
154 | 159 | "# # # Concatenate actuals and planned entries\n", |
|
176 | 181 | "drf['report_yr'] = (drf['report_yr']-1).apply(str) +\"-\"+ (drf['report_yr']).apply(str)\n", |
177 | 182 | "drf['measure_yr'] = (drf['measure_yr']-1).apply(str) +\"-\"+ (drf['measure_yr']).apply(str)\n", |
178 | 183 | "drf['si_link_yr'] = (drf['si_link_yr']-1).apply(str) +\"-\"+ (drf['si_link_yr']).apply(str)\n", |
179 | | - "drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)" |
| 184 | + "drf['latest_si_yr'] = (drf['latest_si_yr']-1).apply(str) +\"-\"+ (drf['latest_si_yr']).apply(str)\n" |
180 | 185 | ] |
181 | | - }, |
182 | | - { |
183 | | - "cell_type": "code", |
184 | | - "execution_count": null, |
185 | | - "id": "dacefaa6", |
186 | | - "metadata": {}, |
187 | | - "outputs": [], |
188 | | - "source": [] |
189 | 186 | } |
190 | 187 | ], |
191 | 188 | "metadata": { |
|
0 commit comments