Skip to content

Commit 385b069

Browse files
committed
set consistent field office name
Signed-off-by: John Seekins <[email protected]>
1 parent e504c12 commit 385b069

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

enricher.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ def enrich_facility_data(self, facilities_data: dict) -> dict:
3232
facility_name = facility["name"]
3333
logger.info("Processing facility %s/%s: %s...", processed + 1, total, facility_name)
3434
enriched_facility = copy.deepcopy(facility)
35-
if not enriched_facility["field_office"]:
36-
enriched_facility["field_office"] = "(Possibly) Not managed by DHS field office"
3735

3836
# Wikipedia search # todo refactor to method
3937
try:

scraper.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,17 @@ def scrape_facilities(self):
331331
if url in self.facilities_data["facilities"][full_address]["source_urls"]:
332332
continue
333333
self.facilities_data["facilities"][full_address]["source_urls"].append(url)
334+
if not self.facilities_data["facilities"][full_address].get("field_office", ""):
335+
self.facilities_data["facilities"][full_address]["field_office"] = (
336+
"(Possibly) Not managed by DHS field office"
337+
)
334338
# this is likely to produce _some_ duplicates, but it's a reasonable starting place
335339
else:
336340
self.facilities_data["facilities"][facility["name"]] = facility
341+
if not self.facilities_data["facilities"][facility["name"]].get("field_office", ""):
342+
self.facilities_data["facilities"][facility["name"]]["field_office"] = (
343+
"(Possibly) Not managed by DHS field office"
344+
)
337345

338346
self.facilities_data["scrape_runtime"] = time.time() - start_time
339347
logger.info("Total facilities scraped: %s", len(self.facilities_data["facilities"]))

0 commit comments

Comments
 (0)