@@ -95,7 +95,6 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
9595 {"match" : "27991 Buena Vista Blvd." , "replace" : "27991 BUENA VISTA BOULEVARD" , "locality" : "Los Fresnos" },
9696 {"match" : "175 Pike County Blvd." , "replace" : "175 PIKE COUNTY BOULEVARD" , "locality" : "Lords Valley" },
9797 {"match" : "500 W. 2nd Street" , "replace" : "301 W. 2nd" , "locality" : "Rolla" },
98- {"match" : "307 Saint Joseph St" , "replace" : "300 KANSAS CITY STREET NONE" , "locality" : "Rapid City" },
9998 {"match" : "3405 West Highway 146" , "replace" : "3405 W HWY 146" , "locality" : "LaGrange" },
10099 {"match" : "1623 E J Street, Suite 2" , "replace" : "1623 E. J STREET" , "locality" : "Tacoma" },
101100 {"match" : "1805 W 32nd Street" , "replace" : "1805 W 32ND ST" , "locality" : "Baldwin" },
@@ -107,7 +106,6 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
107106 {"match" : "2190 E Mesquite Avenue" , "replace" : "2190 EAST MESQUITE AVENUE" , "locality" : "Pahrump" },
108107 {"match" : "287 Industrial Drive" , "replace" : "327 INDUSTRIAL DRIVE" , "locality" : "Jonesboro" },
109108 {"match" : "1572 Gateway Road" , "replace" : "1572 GATEWAY" , "locality" : "Calexico" },
110- {"match" : "203 Aspinall Avenue" , "replace" : "203 ASPINAL AVE. PO BOX 3236" , "locality" : "Hagatna" },
111109 {"match" : "1199 N Haseltine Road" , "replace" : "1199 N HASELTINE RD" , "locality" : "Springfield" },
112110 {"match" : "1701 North Washington" , "replace" : "1701 NORTH WASHINGTON ST" , "locality" : "Grand Forks" },
113111 {"match" : "611 Frontage Road" , "replace" : "611 FRONTAGE RD" , "locality" : "McFarland" },
@@ -127,7 +125,7 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
127125 {"match" : "704 E Broadway Street" , "replace" : "702 E BROADWAY ST" , "locality" : "Eden" },
128126 {"match" : "1300 E Hwy 107" , "replace" : "1330 HIGHWAY 107" , "locality" : "La Villa" },
129127 {"match" : "216 W. Center Street" , "replace" : "215 WEST CENTRAL STREET" , "locality" : "Juneau" },
130- {"match" : "300 El Racho Way " , "replace" : "300 EL RANCHO WAY" , "locality" : "Dilley" },
128+ {"match" : "300 El Rancho Way " , "replace" : "300 EL RANCHO WAY" , "locality" : "Dilley" },
131129 {"match" : "3130 North Oakland Street" , "replace" : "3130 OAKLAND ST" , "locality" : "Aurora" },
132130 {"match" : "03151 Co. Rd. 24.2" , "replace" : "3151 ROAD 2425 ROUTE 1" , "locality" : "Stryker" },
133131 {"match" : "20 Hobo Forks Road" , "replace" : "20 HOBO FORK RD" , "locality" : "Natchez" },
@@ -146,22 +144,37 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
146144 "locality" : "Bowling Green" ,
147145 },
148146 {"match" : "58 Pine Mountain Road" , "replace" : "58 PINE MOUNTAIN RD" , "locality" : "McElhattan" },
147+ {
148+ "match" : "Adelanto East 10400 Rancho Road | Adelanto West 10250 Rancho Road" ,
149+ "replace" : "10250 Rancho Road" ,
150+ "locality" : "Adelanto" ,
151+ },
152+ {"match" : "4702 East Saunders" , "replace" : "4702 EAST SAUNDERS STREET" , "locality" : "Laredo" },
153+ {"match" : "9998 S. Highway 98" , "replace" : "9998 SOUTH HIGHWAY 83" , "locality" : "Laredo" },
149154 # a unique one, 'cause the PHONE NUMBER IS IN THE ADDRESS?!
150155 {"match" : "911 PARR BLVD 775 328 3308" , "replace" : "911 E Parr Blvd" , "locality" : "RENO" },
156+ # fix a few shockingly bad addresses in spreadsheet
157+ {"match" : "DEPARTMENT OF CORRECTIONS 1618 ASH STREET" , "replace" : "1618 Ash Street" , "locality" : "ERIE" },
158+ {"match" : "203 ASPINAL AVE. PO BOX 3236" , "replace" : "203 Aspinall Avenue" , "locality" : "HAGATNA" },
159+ {
160+ "match" : "11866 HASTINGS BRIDGE ROAD P.O. BOX 429" ,
161+ "replace" : "11866 Hastings Bridge Road" ,
162+ "locality" : "LOVEJOY" ,
163+ },
164+ {"match" : "300 KANSAS CITY STREET NONE" , "replace" : "307 Saint Joseph St" , "locality" : "RAPID CITY" },
165+ {"match" : "4909 FM 2826" , "replace" : "4909 Farm to Market Road" , "locality" : "ROBSTOWN" },
166+ {"match" : "6920 DIGITAL RD" , "replace" : "11541 Montana Avenue" , "locality" : "EL PASO" },
151167 # default matches should come last
152168 {"match" : "'s" , "replace" : "" , "locality" : "" },
153169 {"match" : "." , "replace" : "" , "locality" : "" },
154170 {"match" : "," , "replace" : "" , "locality" : "" },
155171 ]
156- stripped_street = street
157172 cleaned = False
158- if any (f ["match" ] in stripped_street for f in street_filters ):
159- cleaned = True
160173 for f in street_filters :
161- if (f ["match" ] in stripped_street ) and ((f ["locality" ] and f ["locality" ] == locality ) or not f ["locality" ]):
162- stripped_street = stripped_street .replace (f ["match" ], f ["replace" ])
174+ if (f ["match" ] in street ) and ((f ["locality" ] and f ["locality" ] == locality ) or not f ["locality" ]):
175+ street = street .replace (f ["match" ], f ["replace" ])
163176 cleaned = True
164- return stripped_street , cleaned
177+ return street , cleaned
165178
166179 def _repair_zip (self , zip_code : int , locality : str ) -> Tuple [str , bool ]:
167180 """
@@ -173,22 +186,21 @@ def _repair_zip(self, zip_code: int, locality: str) -> Tuple[str, bool]:
173186 if len (zcode ) == 4 :
174187 zcode = f"0{ zcode } "
175188 cleaned = True
176- # This address is an absolute mess
177- elif zcode == "89512" and locality == "Reno" :
178- zcode = "89506"
179- cleaned = True
180- elif zcode == "82901" and locality == "Rock Springs" :
181- zcode = "82935"
182- cleaned = True
183- elif zcode == "98421-1615" and locality == "Tacoma" :
184- zcode = "98421"
185- cleaned = True
186- elif zcode == "89048" and locality == "Pahrump" :
187- zcode = "89060"
188- cleaned = True
189- elif zcode == "85132" and locality == "Florence" :
190- zcode = "85232"
191- cleaned = True
189+ matches = [
190+ {"match" : "89512" , "replace" : "89506" , "locality" : "Reno" },
191+ {"match" : "82901" , "replace" : "82935" , "locality" : "Rock Springs" },
192+ {"match" : "98421-1615" , "replace" : "98421" , "locality" : "Tacoma" },
193+ {"match" : "89048" , "replace" : "89060" , "locality" : "Pahrump" },
194+ {"match" : "85132" , "replace" : "85232" , "locality" : "Florence" },
195+ # Laredo facility addresses are particularly bad...
196+ {"match" : "78041" , "replace" : "78401" , "locality" : "LAREDO" },
197+ {"match" : "78401" , "replace" : "78046" , "locality" : "LAREDO" },
198+ ]
199+ for z in matches :
200+ if z ["match" ] == zcode and z ["locality" ] == locality :
201+ zcode = z ["replace" ]
202+ cleaned = True
203+ break
192204 return zcode , cleaned
193205
194206 def _repair_locality (self , locality : str , administrative_area : str ) -> Tuple [str , bool ]:
@@ -197,21 +209,18 @@ def _repair_locality(self, locality: str, administrative_area: str) -> Tuple[str
197209 How the post office ever successfully delivered a letter is beyond me
198210 """
199211 cleaned = False
200- if locality == "LaGrange" and administrative_area == "KY" :
201- locality = "La Grange"
202- cleaned = True
203- elif locality == "Leachfield" and administrative_area == "KY" :
204- locality = "LEITCHFIELD"
205- cleaned = True
206- elif locality == "Susupe, Saipan" and administrative_area == "MP" :
207- locality = "SAIPAN"
208- cleaned = True
209- elif locality == "Cottonwood Falls" and administrative_area == "KS" :
210- locality = "COTTONWOOD FALL"
211- cleaned = True
212- elif locality == "Sault Ste. Marie" and administrative_area == "MI" :
213- locality = "SAULT STE MARIE"
214- cleaned = True
212+ matches = [
213+ {"match" : "LaGrange" , "replace" : "La Grange" , "area" : "KY" },
214+ {"match" : "Leachfield" , "replace" : "LEITCHFIELD" , "area" : "KY" },
215+ {"match" : "SAIPAN" , "replace" : "Susupe, Saipan" , "area" : "MP" },
216+ {"match" : "COTTONWOOD FALL" , "replace" : "Cottonwood Falls" , "area" : "KS" },
217+ {"match" : "Sault Ste. Marie" , "replace" : "SAULT STE MARIE" , "area" : "MI" },
218+ ]
219+ for f in matches :
220+ if f ["match" ] == locality and f ["area" ] == administrative_area :
221+ locality = f ["replace" ]
222+ cleaned = True
223+ break
215224 return locality , cleaned
216225
217226 def _load_sheet (self ) -> dict :
@@ -241,14 +250,14 @@ def _load_sheet(self) -> dict:
241250 if match :
242251 details ["phone" ] = match .group (1 )
243252 details ["_repaired_record" ] = True
244- full_address = "," .join ([street , row ["City" ], row ["State" ], zcode ]).upper ()
245- details ["address" ]["administrative_area" ] = row ["State" ]
246253 locality , cleaned = self ._repair_locality (row ["City" ], row ["State" ])
247254 if cleaned :
248255 details ["_repaired_record" ] = True
249- details ["address" ]["locality" ] = row ["City" ]
250- details ["address" ]["postal_code" ] = row ["Zip" ]
251- details ["address" ]["street" ] = row ["Address" ]
256+ full_address = "," .join ([street , locality , row ["State" ], zcode ]).upper ()
257+ details ["address" ]["administrative_area" ] = row ["State" ]
258+ details ["address" ]["locality" ] = locality
259+ details ["address" ]["postal_code" ] = zcode
260+ details ["address" ]["street" ] = street
252261 details ["name" ] = row ["Name" ]
253262 details ["population" ]["male" ]["criminal" ] = row ["Male Crim" ]
254263 details ["population" ]["male" ]["non_criminal" ] = row ["Male Non-Crim" ]
@@ -325,11 +334,11 @@ def scrape_facilities(self):
325334 facility ["_repaired_record" ] = True
326335 zcode , cleaned = self ._repair_zip (addr ["postal_code" ], addr ["locality" ])
327336 if cleaned :
328- facility ["postal_code" ] = zcode
337+ addr ["postal_code" ] = zcode
329338 facility ["_repaired_record" ] = True
330339 locality , cleaned = self ._repair_locality (addr ["locality" ], addr ["administrative_area" ])
331340 if cleaned :
332- facility ["locality" ] = locality
341+ addr ["locality" ] = locality
333342 facility ["_repaired_record" ] = True
334343 full_address = "," .join ([street , locality , addr ["administrative_area" ], zcode ]).upper ()
335344 if full_address in self .facilities_data ["facilities" ].keys ():
0 commit comments