|
6 | 6 | from logging_logic import log_execution_time |
7 | 7 | from templates.utils.models import columns_in_formula |
8 | 8 |
|
9 | | - |
10 | | -@orca.step("education_model") |
11 | | -def education_model( |
| 9 | +STEP_NAME = "education" |
| 10 | +REQUIRED_COLUMNS = [ |
| 11 | + "persons.edu", |
| 12 | + "persons.student", |
| 13 | +] |
| 14 | + |
| 15 | +@orca.step(STEP_NAME) |
| 16 | +def education( |
12 | 17 | persons, edu_highschool_proportion, edu_highschool_grads_proportion, year |
13 | 18 | ): |
14 | 19 | """ |
15 | | - Run the education model and update the persons table |
16 | | -
|
17 | | - Modifies State Variables: |
18 | | - - persons.edu |
19 | | - - persons.student |
20 | | -
|
21 | | - Args: |
22 | | - persons (DataFrameWrapper): DataFrameWrapper of the persons table |
23 | | -
|
24 | | - Returns: |
25 | | - None |
| 20 | + Simulate educational attainment and student status transitions. |
| 21 | +
|
| 22 | + This step applies the education model to eligible persons (age > 15 and currently students) |
| 23 | + to determine who drops out. It advances students through grades and degrees, maintains |
| 24 | + proportions of high school and GED graduates, and updates the persons table in place. |
| 25 | +
|
| 26 | + Parameters |
| 27 | + ---------- |
| 28 | + persons : orca.Table |
| 29 | + The persons table containing individual-level attributes. |
| 30 | + edu_highschool_proportion : pandas.Series |
| 31 | + Proportion of students in 11th and 12th grade. |
| 32 | + edu_highschool_grads_proportion : pandas.Series |
| 33 | + Proportion of students with GED or high school diploma. |
| 34 | + year : int |
| 35 | + The current simulation year. |
| 36 | +
|
| 37 | + Returns |
| 38 | + ------- |
| 39 | + None |
| 40 | +
|
| 41 | + Notes |
| 42 | + ----- |
| 43 | + - Modifies `persons.edu` and `persons.student` in place. |
| 44 | + - Only persons older than 15 and currently students are considered for dropout modeling. |
| 45 | + - Proportions for transitions (e.g., GED vs. diploma) are maintained using observed data. |
| 46 | + - Some transitions use random assignment based on empirical proportions. |
26 | 47 | """ |
27 | 48 | start_time = time.time() |
28 | 49 |
|
@@ -88,16 +109,57 @@ def education_model( |
88 | 109 |
|
89 | 110 | @orca.injectable(name="edu_highschool_proportion") |
90 | 111 | def edu_highschool_proportion(data="persons.edu"): |
| 112 | + """ |
| 113 | + Calculate the proportion of students in 11th and 12th grade. |
| 114 | +
|
| 115 | + Parameters |
| 116 | + ---------- |
| 117 | + data : pandas.Series |
| 118 | + The `edu` column from the persons table. |
| 119 | +
|
| 120 | + Returns |
| 121 | + ------- |
| 122 | + pandas.Series |
| 123 | + Proportion of students in 11th (15) and 12th (16) grade. |
| 124 | + """ |
91 | 125 | return data[data.isin([15, 16])].value_counts(normalize=True) |
92 | 126 |
|
93 | 127 |
|
94 | 128 | @orca.injectable(name="edu_highschool_grads_proportion") |
95 | 129 | def edu_highschool_grads_proportion(data="persons.edu"): |
| 130 | + """ |
| 131 | + Calculate the proportion of students with GED or high school diploma. |
| 132 | +
|
| 133 | + Parameters |
| 134 | + ---------- |
| 135 | + data : pandas.Series |
| 136 | + The `edu` column from the persons table. |
| 137 | +
|
| 138 | + Returns |
| 139 | + ------- |
| 140 | + pandas.Series |
| 141 | + Proportion of students with GED (16) or high school diploma (17). |
| 142 | + """ |
96 | 143 | return data[data.isin([16, 17])].value_counts(normalize=True) |
97 | 144 |
|
98 | 145 |
|
99 | 146 | @orca.column(table_name="persons") |
100 | 147 | def education_group(data="persons.edu"): |
| 148 | + """ |
| 149 | + Assign each person to an education group. |
| 150 | +
|
| 151 | + Categorizes persons into predefined education intervals for use in modeling and reporting. |
| 152 | +
|
| 153 | + Parameters |
| 154 | + ---------- |
| 155 | + data : pandas.Series |
| 156 | + The `edu` column from the persons table. |
| 157 | +
|
| 158 | + Returns |
| 159 | + ------- |
| 160 | + pandas.Series |
| 161 | + Categorical education group labels as strings. |
| 162 | + """ |
101 | 163 | education_intervals = [0, 18, 22, 200] |
102 | 164 | education_labels = ["lte17", "18-21", "gte22"] |
103 | 165 | return pd.cut( |
|
0 commit comments