Skip to content

Commit 1980951

Browse files
committed
more address matching shenanigans
Signed-off-by: John Seekins <[email protected]>
1 parent c3341c1 commit 1980951

File tree

1 file changed

+32
-5
lines changed

1 file changed

+32
-5
lines changed

scraper.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,6 @@ def _download_sheet(self) -> None:
4444
def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
4545
"""Generally, we'll let the spreadsheet win arguments just to be consistent"""
4646
street_filters = [
47-
{"match": "'s", "replace": "", "locality": ""},
48-
{"match": ".", "replace": "", "locality": ""},
49-
{"match": ",", "replace": "", "locality": ""},
5047
# address mismatch between site and spreadsheet
5148
{"match": "80 29th Street", "replace": "100 29th Street", "locality": "Brooklyn"},
5249
{"match": "2250 Laffoon Trl", "replace": "2250 Lafoon Trail", "locality": "Madisonville"},
@@ -76,9 +73,29 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
7673
{"match": "175 Pike County Blvd.", "replace": "175 PIKE COUNTY BOULEVARD", "locality": "Lords Valley"},
7774
{"match": "500 W. 2nd Street", "replace": "301 W. 2nd", "locality": "Rolla"},
7875
{"match": "307 Saint Joseph St", "replace": "300 KANSAS CITY STREET NONE", "locality": "Rapid City"},
79-
{"match": "3405 West Highway 146", "replace": "3405 W HWY 146", "locality": "La Grange"},
76+
{"match": "3405 West Highway 146", "replace": "3405 W HWY 146", "locality": "LaGrange"},
77+
{"match": "1623 E J Street, Suite 2", "replace": "1623 E. J STREET", "locality": "Tacoma"},
78+
{"match": "1805 W 32nd Street", "replace": "1805 W 32ND ST", "locality": "Baldwin"},
79+
{"match": "500 Hilbig Road", "replace": "500 HILBIG RD", "locality": "Conroe"},
80+
{"match": "425 Golden State Avenue", "replace": "425 Golden State Ave", "locality": "Bakersfield"},
81+
{"match": "832 East Texas HWY 44", "replace": "832 EAST TEXAS STATE HIGHWAY 44", "locality": "Encinal"},
82+
{"match": "18201 SW 12th Street", "replace": "18201 SW 12TH ST", "locality": "Miami"},
83+
{"match": "2190 E Mesquite Avenue", "replace": "2190 EAST MESQUITE AVENUE", "locality": "Pahrump"},
84+
{"match": "287 Industrial Drive", "replace": "327 INDUSTRIAL DRIVE", "locality": "Jonesboro"},
85+
{"match": "1572 Gateway Road", "replace": "1572 GATEWAY", "locality": "Calexico"},
86+
{"match": "203 Aspinall Avenue", "replace": "203 ASPINAL AVE. PO BOX 3236", "locality": "Hagatna"},
87+
{"match": "1199 N Haseltine Road", "replace": "1199 N HASELTINE RD", "locality": "Springfield"},
88+
{"match": "1701 North Washington", "replace": "1701 NORTH WASHINGTON ST", "locality": "Grand Forks"},
89+
{"match": "611 Frontage Road", "replace": "611 FRONTAGE RD", "locality": "McFarland"},
90+
{"match": "12450 Merritt Road", "replace": "12450 MERRITT DR", "locality": "Chardon"},
91+
{"match": "411 S. Broadway Avenue", "replace": "411 SOUTH BROADWAY AVENUE", "locality": "Chardon"},
92+
{"match": "3424 Hwy 252 E", "replace": "3424 HIGHWAY 252 EAST", "locality": "Folkston"},
8093
# a unique one, 'cause the PHONE NUMBER IS IN THE ADDRESS?!
8194
{"match": "911 PARR BLVD 775 328 3308", "replace": "911 E Parr Blvd", "locality": "RENO"},
95+
# default matches should come last
96+
{"match": "'s", "replace": "", "locality": ""},
97+
{"match": ".", "replace": "", "locality": ""},
98+
{"match": ",", "replace": "", "locality": ""},
8299
]
83100
stripped_street = street
84101
cleaned = False
@@ -91,7 +108,10 @@ def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
91108
return stripped_street, cleaned
92109

93110
def _repair_zip(self, zip_code: int, locality: str) -> Tuple[str, bool]:
94-
"""Excel does a cool thing where it strips leading 0s"""
111+
"""
112+
Excel does a cool thing where it strips leading 0s
113+
Also, many zip codes are mysteriously discordant
114+
"""
95115
zcode = str(zip_code)
96116
cleaned = False
97117
if len(zcode) == 4:
@@ -104,6 +124,10 @@ def _repair_zip(self, zip_code: int, locality: str) -> Tuple[str, bool]:
104124
if zcode == "82901" and locality == "Rock Springs":
105125
zcode = "82935"
106126
cleaned = True
127+
if zcode == "98421-1615" and locality == "Tacoma":
128+
zcode = "98421"
129+
if zcode == "89048" and locality == "Pahrump":
130+
zcode = "89060"
107131
return zcode, cleaned
108132

109133
def _repair_locality(self, locality: str, administrative_area: str) -> Tuple[str, bool]:
@@ -115,6 +139,9 @@ def _repair_locality(self, locality: str, administrative_area: str) -> Tuple[str
115139
if locality == "LaGrange" and administrative_area == "KY":
116140
locality = "La Grange"
117141
cleaned = True
142+
if locality == "Leachfield" and administrative_area == "KY":
143+
locality = "LEITCHFIELD"
144+
cleaned = True
118145
return locality, cleaned
119146

120147
def _load_sheet(self) -> dict:

0 commit comments

Comments
 (0)