77import polars
88import re
99from schemas import (
10+ default_field_office ,
1011 facilities_schema ,
1112 facility_schema ,
13+ ice_facility_types ,
1214)
1315import time
1416from typing import Tuple
@@ -261,9 +263,11 @@ def _load_sheet(self) -> dict:
261263 details ["population" ]["male" ]["allowed" ] = True
262264
263265 details ["facility_type" ] = row ["Type Detailed" ]
266+ details ["facility_type_detail" ] = ice_facility_types .get (row ["Type Detailed" ], {})
264267 details ["avg_stay_length" ] = row ["FY25 ALOS" ]
265268 details ["inspection_date" ] = row ["Last Inspection End Date" ]
266269 details ["source_urls" ].append (self .sheet_url )
270+ details ["field_office" ] = default_field_office
267271 results [full_address ] = details
268272 return results
269273
@@ -323,6 +327,8 @@ def scrape_facilities(self):
323327 self .facilities_data ["facilities" ][full_address ] = self ._update_facility (
324328 self .facilities_data ["facilities" ][full_address ], facility
325329 )
330+ if facility ["field_office" ]:
331+ self .facilities_data ["facilities" ][full_address ]["field_office" ] = facility ["field_office" ]
326332 # update to the frequently nicer address from ice.gov
327333 self .facilities_data ["facilities" ][full_address ]["address" ] = addr
328334 # add scraped urls
@@ -331,17 +337,9 @@ def scrape_facilities(self):
331337 if url in self .facilities_data ["facilities" ][full_address ]["source_urls" ]:
332338 continue
333339 self .facilities_data ["facilities" ][full_address ]["source_urls" ].append (url )
334- if not self .facilities_data ["facilities" ][full_address ].get ("field_office" , "" ):
335- self .facilities_data ["facilities" ][full_address ]["field_office" ] = (
336- "(Possibly) Not managed by DHS field office"
337- )
338340 # this is likely to produce _some_ duplicates, but it's a reasonable starting place
339341 else :
340342 self .facilities_data ["facilities" ][facility ["name" ]] = facility
341- if not self .facilities_data ["facilities" ][facility ["name" ]].get ("field_office" , "" ):
342- self .facilities_data ["facilities" ][facility ["name" ]]["field_office" ] = (
343- "(Possibly) Not managed by DHS field office"
344- )
345343
346344 self .facilities_data ["scrape_runtime" ] = time .time () - start_time
347345 logger .info ("Total facilities scraped: %s" , len (self .facilities_data ["facilities" ]))
0 commit comments