@@ -77,6 +77,7 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
7777 {"match" : "1623 E J Street, Suite 2" , "replace" : "1623 E. J STREET" , "locality" : "Tacoma" },
7878 {"match" : "1805 W 32nd Street" , "replace" : "1805 W 32ND ST" , "locality" : "Baldwin" },
7979 {"match" : "500 Hilbig Road" , "replace" : "500 HILBIG RD" , "locality" : "Conroe" },
80+ {"match" : "806 Hilbig Road" , "replace" : "806 HILBIG RD" , "locality" : "Conroe" },
8081 {"match" : "425 Golden State Avenue" , "replace" : "425 Golden State Ave" , "locality" : "Bakersfield" },
8182 {"match" : "832 East Texas HWY 44" , "replace" : "832 EAST TEXAS STATE HIGHWAY 44" , "locality" : "Encinal" },
8283 {"match" : "18201 SW 12th Street" , "replace" : "18201 SW 12TH ST" , "locality" : "Miami" },
@@ -88,7 +89,7 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
8889 {"match" : "1701 North Washington" , "replace" : "1701 NORTH WASHINGTON ST" , "locality" : "Grand Forks" },
8990 {"match" : "611 Frontage Road" , "replace" : "611 FRONTAGE RD" , "locality" : "McFarland" },
9091 {"match" : "12450 Merritt Road" , "replace" : "12450 MERRITT DR" , "locality" : "Chardon" },
91- {"match" : "411 S. Broadway Avenue" , "replace" : "411 SOUTH BROADWAY AVENUE" , "locality" : "Chardon " },
92+ {"match" : "411 S. Broadway Avenue" , "replace" : "411 SOUTH BROADWAY AVENUE" , "locality" : "Albert Lea " },
9293 {"match" : "3424 Hwy 252 E" , "replace" : "3424 HIGHWAY 252 EAST" , "locality" : "Folkston" },
9394 # a unique one, 'cause the PHONE NUMBER IS IN THE ADDRESS?!
9495 {"match" : "911 PARR BLVD 775 328 3308" , "replace" : "911 E Parr Blvd" , "locality" : "RENO" },
@@ -142,6 +143,9 @@ def _repair_locality(self, locality: str, administrative_area: str) -> Tuple[str
142143 if locality == "Leachfield" and administrative_area == "KY" :
143144 locality = "LEITCHFIELD"
144145 cleaned = True
146+ if locality == "Susupe, Saipan" and administrative_area == "MP" :
147+ locality = "SAIPAN"
148+ cleaned = True
145149 return locality , cleaned
146150
147151 def _load_sheet (self ) -> dict :
@@ -199,6 +203,14 @@ def _load_sheet(self) -> dict:
199203 results [full_address ] = details
200204 return results
201205
206+ def _update_facility (self , old : dict , new : dict ) -> dict :
207+ for k , v in new .items ():
208+ if isinstance (v , dict ):
209+ old [k ] = self ._update_facility (old [k ], new [k ])
210+ if not old .get (k , None ):
211+ old [k ] = v
212+ return old
213+
202214 def scrape_facilities (self ):
203215 """Scrape all ICE detention facility data from all 6 pages"""
204216 start_time = time .time ()
@@ -230,10 +242,9 @@ def scrape_facilities(self):
230242 facility ["_repaired_record" ] = True
231243 full_address = "," .join ([street , locality , addr ["administrative_area" ], zcode ]).upper ()
232244 if full_address in self .facilities_data ["facilities" ].keys ():
233- for key , value in facility .items ():
234- if self .facilities_data ["facilities" ][full_address ].get (key , None ):
235- continue
236- self .facilities_data ["facilities" ][full_address ][key ] = value
245+ self .facilities_data ["facilities" ][full_address ] = self ._update_facility (
246+ self .facilities_data ["facilities" ][full_address ], facility
247+ )
237248 # this is likely to produce _some_ duplicates, but it's a reasonable starting place
238249 else :
239250 self .facilities_data ["facilities" ][facility ["name" ]] = facility
0 commit comments