@@ -115,14 +115,14 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
115115 {"match" : "601 Central Avenue" , "replace" : "601 CENTRAL AVE" , "locality" : "Newport" },
116116 {"match" : "501 E Court Avenue" , "replace" : "501 EAST COURT AVE" , "locality" : "Jeffersonville" },
117117 {"match" : "3200 S. Kings Hwy" , "replace" : "3700 S KINGS HWY" , "locality" : "Cushing" },
118- {"match" : "325 Court Street" , "replace" : "325 COURT STREET" , "locality" : "Sault Ste. Marie" },
119118 {"match" : "301 South Walnut" , "replace" : "301 SOUTH WALNUT STREET" , "locality" : "Cottonwood Falls" },
120119 {"match" : "830 Pine Hill Road" , "replace" : "830 PINEHILL ROAD" , "locality" : "Jena" },
121120 {
122121 "match" : "11093 SW Lewis Memorial Dr" ,
123122 "replace" : "11093 SW LEWIS MEMORIAL DRIVE" ,
124123 "locality" : "Bowling Green" ,
125124 },
125+ {"match" : "58 Pine Mountain Road" , "replace" : "58 PINE MOUNTAIN RD" , "locality" : "McElhattan" },
126126 # a unique one, 'cause the PHONE NUMBER IS IN THE ADDRESS?!
127127 {"match" : "911 PARR BLVD 775 328 3308" , "replace" : "911 E Parr Blvd" , "locality" : "RENO" },
128128 # default matches should come last
@@ -186,6 +186,9 @@ def _repair_locality(self, locality: str, administrative_area: str) -> Tuple[str
186186 if locality == "Cottonwood Falls" and administrative_area == "KS" :
187187 locality = "COTTONWOOD FALL"
188188 cleaned = True
189+ if locality == "Sault Ste. Marie" and administrative_area == "MI" :
190+ locality = "SAULT STE MARIE"
191+ cleaned = True
189192 return locality , cleaned
190193
191194 def _load_sheet (self ) -> dict :
@@ -240,6 +243,7 @@ def _load_sheet(self) -> dict:
240243 details ["facility_type" ] = row ["Type Detailed" ]
241244 details ["avg_stay_length" ] = row ["FY25 ALOS" ]
242245 details ["inspection_date" ] = row ["Last Inspection End Date" ]
246+ details ["source_urls" ].append (self .sheet_url )
243247 results [full_address ] = details
244248 return results
245249
@@ -285,6 +289,7 @@ def scrape_facilities(self):
285289 self .facilities_data ["facilities" ][full_address ] = self ._update_facility (
286290 self .facilities_data ["facilities" ][full_address ], facility
287291 )
292+ self .facilities_data ["facilities" ][full_address ]["address" ] = addr
288293 # this is likely to produce _some_ duplicates, but it's a reasonable starting place
289294 else :
290295 self .facilities_data ["facilities" ][facility ["name" ]] = facility
@@ -466,9 +471,12 @@ def _extract_single_facility(self, element, page_url):
466471 if image_element :
467472 facility ["image_url" ] = f"https://www.ice.gov{ image_element [0 ]['src' ]} "
468473 facility_url_element = element .findAll ("a" )
474+ facility_url = ""
469475 if facility_url_element :
470- facility ["facility_url" ] = f"https://www.ice.gov{ facility_url_element [0 ]['href' ]} "
471- facility ["page_updated_date" ] = self ._scrape_updated (facility .get ("facility_url" , "" ))
476+ facility_url = f"https://www.ice.gov{ facility_url_element [0 ]['href' ]} "
477+ facility ["source_urls" ].append (facility_url )
478+ if facility_url :
479+ facility ["page_updated_date" ] = self ._scrape_updated (facility_url )
472480 # Clean up extracted data
473481 facility = self ._clean_facility_data (facility )
474482
0 commit comments