1212 StratumConstraint ,
1313 Target ,
1414)
15+ from policyengine_us_data .utils .census import get_census_docs , pull_acs_table
1516
1617
1718LABEL_TO_SHORT = {
3233 "Estimate!!Total!!Total population!!AGE!!70 to 74 years" : "70-74" ,
3334 "Estimate!!Total!!Total population!!AGE!!75 to 79 years" : "75-79" ,
3435 "Estimate!!Total!!Total population!!AGE!!80 to 84 years" : "80-84" ,
35- "Estimate!!Total!!Total population!!AGE!!85 years and over" : "85-inf " ,
36+ "Estimate!!Total!!Total population!!AGE!!85 years and over" : "85-999 " ,
3637}
3738AGE_COLS = list (LABEL_TO_SHORT .values ())
3839
3940
40- def extract_docs (year = 2023 ):
41- docs_url = (
42- f"https://api.census.gov/data/{ year } /acs/acs1/subject/variables.json"
43- )
44-
45- try :
46- docs_response = requests .get (docs_url )
47- docs_response .raise_for_status ()
48-
49- docs = docs_response .json ()
50- docs ["year" ] = year
51-
52- except requests .exceptions .RequestException as e :
53- print (f"Error during API request: { e } " )
54- raise
55- except Exception as e :
56- print (f"An error occurred: { e } " )
57- raise
58- return docs
59-
60-
61- def extract_age_data (geo , year = 2023 ):
62- base_url = (
63- f"https://api.census.gov/data/{ year } /acs/acs1/subject?get=group(S0101)"
64- )
65-
66- if geo == "State" :
67- url = f"{ base_url } &for=state:*"
68- elif geo == "District" :
69- url = f"{ base_url } &for=congressional+district:*"
70- elif geo == "National" :
71- url = f"{ base_url } &for=us:*"
72- else :
73- raise ValueError (
74- "geo must be either 'National', 'State', or 'District'"
75- )
76-
77- try :
78- response = requests .get (url )
79- response .raise_for_status ()
80-
81- data = response .json ()
82-
83- headers = data [0 ]
84- data_rows = data [1 :]
85- df = pd .DataFrame (data_rows , columns = headers )
86-
87- except requests .exceptions .RequestException as e :
88- print (f"Error during API request: { e } " )
89- raise
90- except Exception as e :
91- print (f"An error occurred: { e } " )
92- raise
93- return df
94-
95-
9641def transform_age_data (age_data , docs ):
9742 df = age_data .copy ()
9843
@@ -131,13 +76,14 @@ def transform_age_data(age_data, docs):
13176 var_name = "age_range" ,
13277 value_name = "value" ,
13378 )
134- age_bounds = df_long ["age_range" ].str .split ("-" , expand = True )
135- df_long ["age_greater_than_or_equal_to" ] = (
136- age_bounds [0 ].str .replace ("+" , "" ).astype (int )
137- )
138- df_long ["age_less_than_or_equal_to" ] = pd .to_numeric (age_bounds [1 ])
79+ age_bounds = df_long ["age_range" ].str .split ("-" , expand = True ).astype (int )
80+ age_bounds .columns = ["ge" , "le" ]
81+ age_bounds [['gt' ]] = age_bounds [["ge" ]] - 1
82+ age_bounds [['lt' ]] = age_bounds [["le" ]] + 1
83+
84+ df_long ["age_greater_than" ] = age_bounds [["gt" ]]
85+ df_long ["age_less_than" ] = age_bounds [["lt" ]]
13986 df_long ["variable" ] = "person_count"
140- df_long ["period" ] = docs ["year" ]
14187 df_long ["reform_id" ] = 0
14288 df_long ["source_id" ] = 1
14389 df_long ["active" ] = True
@@ -149,7 +95,7 @@ def get_parent_geo(geo):
14995 return {"National" : None , "State" : "National" , "District" : "State" }[geo ]
15096
15197
152- def load_age_data (df_long , geo , stratum_lookup = {}):
98+ def load_age_data (df_long , geo , year , stratum_lookup = {}):
15399
154100 # Quick data quality check before loading ----
155101 if geo == "National" :
@@ -192,6 +138,7 @@ def load_age_data(df_long, geo, stratum_lookup={}):
192138 )
193139
194140 # Create constraints and link them to the parent's relationship attribute.
141+ # TODO: greater_than_or_equal_to to just greater than!
195142 new_stratum .constraints_rel = [
196143 StratumConstraint (
197144 constraint_variable = "ucgid_str" ,
@@ -200,26 +147,26 @@ def load_age_data(df_long, geo, stratum_lookup={}):
200147 ),
201148 StratumConstraint (
202149 constraint_variable = "age" ,
203- operation = "greater_than_or_equal " ,
204- value = str (row ["age_greater_than_or_equal_to " ]),
150+ operation = "greater_than " ,
151+ value = str (row ["age_greater_than " ]),
205152 ),
206153 ]
207154
208- age_lt_value = row ["age_less_than_or_equal_to " ]
155+ age_lt_value = row ["age_less_than " ]
209156 if not np .isinf (age_lt_value ):
210157 new_stratum .constraints_rel .append (
211158 StratumConstraint (
212159 constraint_variable = "age" ,
213160 operation = "less_than" ,
214- value = str (age_lt_value + 1 ),
161+ value = str (row [ "age_less_than" ] ),
215162 )
216163 )
217164
218165 # Create the Target and link it to the parent.
219166 new_stratum .targets_rel .append (
220167 Target (
221168 variable = row ["variable" ],
222- period = row [ "period" ] ,
169+ period = year ,
223170 value = row ["value" ],
224171 source_id = row ["source_id" ],
225172 active = row ["active" ],
@@ -243,18 +190,24 @@ def load_age_data(df_long, geo, stratum_lookup={}):
243190if __name__ == "__main__" :
244191
245192 # --- ETL: Extract, Transform, Load ----
193+ year = 2023
246194
247195 # ---- Extract ----------
248- docs = extract_docs (2023 )
249- national_df = extract_age_data ("National" , 2023 )
250- state_df = extract_age_data ("State" , 2023 )
196+ docs = get_census_docs (year )
197+ national_df = pull_acs_table ("S0101" , "National" , year )
198+ state_df = pull_acs_table ("S0101" , "State" , year )
199+ district_df = pull_acs_table ("S0101" , "District" , year )
251200
252201 # --- Transform ----------
253202 long_national_df = transform_age_data (national_df , docs )
254203 long_state_df = transform_age_data (state_df , docs )
204+ long_district_df = transform_age_data (district_df , docs )
255205
256206 # --- Load --------
257- national_strata_lku = load_age_data (long_national_df , "National" )
207+ national_strata_lku = load_age_data (long_national_df , "National" , year )
258208 state_strata_lku = load_age_data (
259- long_state_df , "State" , national_strata_lku
209+ long_state_df , "State" , year , national_strata_lku
210+ )
211+ load_age_data (
212+ long_district_df , "District" , year , state_strata_lku
260213 )
0 commit comments