@@ -94,7 +94,6 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
9494 {"match" : "27991 Buena Vista Blvd." , "replace" : "27991 BUENA VISTA BOULEVARD" , "locality" : "Los Fresnos" },
9595 {"match" : "175 Pike County Blvd." , "replace" : "175 PIKE COUNTY BOULEVARD" , "locality" : "Lords Valley" },
9696 {"match" : "500 W. 2nd Street" , "replace" : "301 W. 2nd" , "locality" : "Rolla" },
97- {"match" : "307 Saint Joseph St" , "replace" : "300 KANSAS CITY STREET NONE" , "locality" : "Rapid City" },
9897 {"match" : "3405 West Highway 146" , "replace" : "3405 W HWY 146" , "locality" : "LaGrange" },
9998 {"match" : "1623 E J Street, Suite 2" , "replace" : "1623 E. J STREET" , "locality" : "Tacoma" },
10099 {"match" : "1805 W 32nd Street" , "replace" : "1805 W 32ND ST" , "locality" : "Baldwin" },
@@ -106,7 +105,6 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
106105 {"match" : "2190 E Mesquite Avenue" , "replace" : "2190 EAST MESQUITE AVENUE" , "locality" : "Pahrump" },
107106 {"match" : "287 Industrial Drive" , "replace" : "327 INDUSTRIAL DRIVE" , "locality" : "Jonesboro" },
108107 {"match" : "1572 Gateway Road" , "replace" : "1572 GATEWAY" , "locality" : "Calexico" },
109- {"match" : "203 Aspinall Avenue" , "replace" : "203 ASPINAL AVE. PO BOX 3236" , "locality" : "Hagatna" },
110108 {"match" : "1199 N Haseltine Road" , "replace" : "1199 N HASELTINE RD" , "locality" : "Springfield" },
111109 {"match" : "1701 North Washington" , "replace" : "1701 NORTH WASHINGTON ST" , "locality" : "Grand Forks" },
112110 {"match" : "611 Frontage Road" , "replace" : "611 FRONTAGE RD" , "locality" : "McFarland" },
@@ -126,7 +124,7 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
126124 {"match" : "704 E Broadway Street" , "replace" : "702 E BROADWAY ST" , "locality" : "Eden" },
127125 {"match" : "1300 E Hwy 107" , "replace" : "1330 HIGHWAY 107" , "locality" : "La Villa" },
128126 {"match" : "216 W. Center Street" , "replace" : "215 WEST CENTRAL STREET" , "locality" : "Juneau" },
129- {"match" : "300 El Racho Way " , "replace" : "300 EL RANCHO WAY" , "locality" : "Dilley" },
127+ {"match" : "300 El Rancho Way " , "replace" : "300 EL RANCHO WAY" , "locality" : "Dilley" },
130128 {"match" : "3130 North Oakland Street" , "replace" : "3130 OAKLAND ST" , "locality" : "Aurora" },
131129 {"match" : "03151 Co. Rd. 24.2" , "replace" : "3151 ROAD 2425 ROUTE 1" , "locality" : "Stryker" },
132130 {"match" : "20 Hobo Forks Road" , "replace" : "20 HOBO FORK RD" , "locality" : "Natchez" },
@@ -145,22 +143,37 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
145143 "locality" : "Bowling Green" ,
146144 },
147145 {"match" : "58 Pine Mountain Road" , "replace" : "58 PINE MOUNTAIN RD" , "locality" : "McElhattan" },
146+ {
147+ "match" : "Adelanto East 10400 Rancho Road | Adelanto West 10250 Rancho Road" ,
148+ "replace" : "10250 Rancho Road" ,
149+ "locality" : "Adelanto" ,
150+ },
151+ {"match" : "4702 East Saunders" , "replace" : "4702 EAST SAUNDERS STREET" , "locality" : "Laredo" },
152+ {"match" : "9998 S. Highway 98" , "replace" : "9998 SOUTH HIGHWAY 83" , "locality" : "Laredo" },
148153 # a unique one, 'cause the PHONE NUMBER IS IN THE ADDRESS?!
149154 {"match" : "911 PARR BLVD 775 328 3308" , "replace" : "911 E Parr Blvd" , "locality" : "RENO" },
155+ # fix a few shockingly bad addresses in spreadsheet
156+ {"match" : "DEPARTMENT OF CORRECTIONS 1618 ASH STREET" , "replace" : "1618 Ash Street" , "locality" : "ERIE" },
157+ {"match" : "203 ASPINAL AVE. PO BOX 3236" , "replace" : "203 Aspinall Avenue" , "locality" : "HAGATNA" },
158+ {
159+ "match" : "11866 HASTINGS BRIDGE ROAD P.O. BOX 429" ,
160+ "replace" : "11866 Hastings Bridge Road" ,
161+ "locality" : "LOVEJOY" ,
162+ },
163+ {"match" : "300 KANSAS CITY STREET NONE" , "replace" : "307 Saint Joseph St" , "locality" : "RAPID CITY" },
164+ {"match" : "4909 FM 2826" , "replace" : "4909 Farm to Market Road" , "locality" : "ROBSTOWN" },
165+ {"match" : "6920 DIGITAL RD" , "replace" : "11541 Montana Avenue" , "locality" : "EL PASO" },
150166 # default matches should come last
151167 {"match" : "'s" , "replace" : "" , "locality" : "" },
152168 {"match" : "." , "replace" : "" , "locality" : "" },
153169 {"match" : "," , "replace" : "" , "locality" : "" },
154170 ]
155- stripped_street = street
156171 cleaned = False
157- if any (f ["match" ] in stripped_street for f in street_filters ):
158- cleaned = True
159172 for f in street_filters :
160- if (f ["match" ] in stripped_street ) and ((f ["locality" ] and f ["locality" ] == locality ) or not f ["locality" ]):
161- stripped_street = stripped_street .replace (f ["match" ], f ["replace" ])
173+ if (f ["match" ] in street ) and ((f ["locality" ] and f ["locality" ] == locality ) or not f ["locality" ]):
174+ street = street .replace (f ["match" ], f ["replace" ])
162175 cleaned = True
163- return stripped_street , cleaned
176+ return street , cleaned
164177
165178 def _repair_zip (self , zip_code : int , locality : str ) -> Tuple [str , bool ]:
166179 """
@@ -172,22 +185,21 @@ def _repair_zip(self, zip_code: int, locality: str) -> Tuple[str, bool]:
172185 if len (zcode ) == 4 :
173186 zcode = f"0{ zcode } "
174187 cleaned = True
175- # This address is an absolute mess
176- if zcode == "89512" and locality == "Reno" :
177- zcode = "89506"
178- cleaned = True
179- if zcode == "82901" and locality == "Rock Springs" :
180- zcode = "82935"
181- cleaned = True
182- if zcode == "98421-1615" and locality == "Tacoma" :
183- zcode = "98421"
184- cleaned = True
185- if zcode == "89048" and locality == "Pahrump" :
186- zcode = "89060"
187- cleaned = True
188- if zcode == "85132" and locality == "Florence" :
189- zcode = "85232"
190- cleaned = True
188+ matches = [
189+ {"match" : "89512" , "replace" : "89506" , "locality" : "Reno" },
190+ {"match" : "82901" , "replace" : "82935" , "locality" : "Rock Springs" },
191+ {"match" : "98421-1615" , "replace" : "98421" , "locality" : "Tacoma" },
192+ {"match" : "89048" , "replace" : "89060" , "locality" : "Pahrump" },
193+ {"match" : "85132" , "replace" : "85232" , "locality" : "Florence" },
194+ # Laredo facility addresses are particularly bad...
195+ {"match" : "78041" , "replace" : "78401" , "locality" : "LAREDO" },
196+ {"match" : "78401" , "replace" : "78046" , "locality" : "LAREDO" },
197+ ]
198+ for z in matches :
199+ if z ["match" ] == zcode and z ["locality" ] == locality :
200+ zcode = z ["replace" ]
201+ cleaned = True
202+ break
191203 return zcode , cleaned
192204
193205 def _repair_locality (self , locality : str , administrative_area : str ) -> Tuple [str , bool ]:
@@ -196,21 +208,18 @@ def _repair_locality(self, locality: str, administrative_area: str) -> Tuple[str
196208 How the post office ever successfully delivered a letter is beyond me
197209 """
198210 cleaned = False
199- if locality == "LaGrange" and administrative_area == "KY" :
200- locality = "La Grange"
201- cleaned = True
202- if locality == "Leachfield" and administrative_area == "KY" :
203- locality = "LEITCHFIELD"
204- cleaned = True
205- if locality == "Susupe, Saipan" and administrative_area == "MP" :
206- locality = "SAIPAN"
207- cleaned = True
208- if locality == "Cottonwood Falls" and administrative_area == "KS" :
209- locality = "COTTONWOOD FALL"
210- cleaned = True
211- if locality == "Sault Ste. Marie" and administrative_area == "MI" :
212- locality = "SAULT STE MARIE"
213- cleaned = True
211+ matches = [
212+ {"match" : "LaGrange" , "replace" : "La Grange" , "area" : "KY" },
213+ {"match" : "Leachfield" , "replace" : "LEITCHFIELD" , "area" : "KY" },
214+ {"match" : "SAIPAN" , "replace" : "Susupe, Saipan" , "area" : "MP" },
215+ {"match" : "COTTONWOOD FALL" , "replace" : "Cottonwood Falls" , "area" : "KS" },
216+ {"match" : "Sault Ste. Marie" , "replace" : "SAULT STE MARIE" , "area" : "MI" },
217+ ]
218+ for f in matches :
219+ if f ["match" ] == locality and f ["area" ] == administrative_area :
220+ locality = f ["replace" ]
221+ cleaned = True
222+ break
214223 return locality , cleaned
215224
216225 def _load_sheet (self ) -> dict :
@@ -240,14 +249,14 @@ def _load_sheet(self) -> dict:
240249 if match :
241250 details ["phone" ] = match .group (1 )
242251 details ["_repaired_record" ] = True
243- full_address = "," .join ([street , row ["City" ], row ["State" ], zcode ]).upper ()
244- details ["address" ]["administrative_area" ] = row ["State" ]
245252 locality , cleaned = self ._repair_locality (row ["City" ], row ["State" ])
246253 if cleaned :
247254 details ["_repaired_record" ] = True
248- details ["address" ]["locality" ] = row ["City" ]
249- details ["address" ]["postal_code" ] = row ["Zip" ]
250- details ["address" ]["street" ] = row ["Address" ]
255+ full_address = "," .join ([street , locality , row ["State" ], zcode ]).upper ()
256+ details ["address" ]["administrative_area" ] = row ["State" ]
257+ details ["address" ]["locality" ] = locality
258+ details ["address" ]["postal_code" ] = zcode
259+ details ["address" ]["street" ] = street
251260 details ["name" ] = row ["Name" ]
252261 details ["population" ]["male" ]["criminal" ] = row ["Male Crim" ]
253262 details ["population" ]["male" ]["non_criminal" ] = row ["Male Non-Crim" ]
@@ -316,12 +325,15 @@ def scrape_facilities(self):
316325 addr = facility ["address" ]
317326 street , cleaned = self ._clean_street (addr ["street" ], addr ["locality" ])
318327 if cleaned :
328+ addr ["street" ] = street
319329 facility ["_repaired_record" ] = True
320330 zcode , cleaned = self ._repair_zip (addr ["postal_code" ], addr ["locality" ])
321331 if cleaned :
332+ addr ["postal_code" ] = zcode
322333 facility ["_repaired_record" ] = True
323334 locality , cleaned = self ._repair_locality (addr ["locality" ], addr ["administrative_area" ])
324335 if cleaned :
336+ addr ["locality" ] = locality
325337 facility ["_repaired_record" ] = True
326338 full_address = "," .join ([street , locality , addr ["administrative_area" ], zcode ]).upper ()
327339 if not facility ["address_str" ]:
0 commit comments