Skip to content

Commit 5a15b31

Browse files
committed
move all functions into folder
Signed-off-by: John Seekins <[email protected]>
1 parent 30072fc commit 5a15b31

File tree

5 files changed

+24
-19
lines changed

5 files changed

+24
-19
lines changed

enrichers/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
session,
88
)
99

10+
OSM_DELAY = 1.0 # 1 second between requests as per OSM policy
11+
WIKIDATA_DELAY = 0.5 # Be respectful to Wikidata
12+
WIKIPEDIA_DELAY = 0.5 # Be respectful to Wikipedia
13+
14+
# default to Washington, D.C.?
15+
default_coords: dict = {
16+
"latitude": 38.89511000,
17+
"longitude": -77.03637000,
18+
}
19+
1020

1121
class Enrichment(object):
1222
required_keys = [
@@ -109,3 +119,6 @@ def _clean_facility_name(self, name: str) -> str:
109119
cleaned = cleaned[: -len(suffix)].strip()
110120
break
111121
return cleaned
122+
123+
124+
from .general import enrich_facility_data # noqa: F401,E402

enricher.py renamed to enrichers/general.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
from concurrent.futures import ProcessPoolExecutor
22
import copy
33
from enrichers import (
4+
default_coords,
5+
OSM_DELAY,
46
openstreetmap,
57
wikidata,
8+
WIKIDATA_DELAY,
69
wikipedia,
10+
WIKIPEDIA_DELAY,
711
)
812
from schemas import (
9-
default_coords,
1013
facilities_schema,
11-
OSM_DELAY,
12-
WIKIDATA_DELAY,
13-
WIKIPEDIA_DELAY,
1414
)
1515
import time
1616
from utils import logger
@@ -25,7 +25,7 @@ def enrich_facility_data(facilities_data: dict, workers: int = 3) -> dict:
2525
processed = 0
2626

2727
with ProcessPoolExecutor(max_workers=workers) as pool:
28-
for res in pool.map(enrich_facility, facilities_data["facilities"].items()):
28+
for res in pool.map(_enrich_facility, facilities_data["facilities"].items()):
2929
enriched_data["facilities"][res[0]] = res[1] # type: ignore [index]
3030
processed += 1
3131
logger.info(" -> Finished %s, %s/%s completed", res[1]["name"], processed, total)
@@ -36,7 +36,7 @@ def enrich_facility_data(facilities_data: dict, workers: int = 3) -> dict:
3636
return enriched_data
3737

3838

39-
def enrich_facility(facility_data: tuple) -> tuple:
39+
def _enrich_facility(facility_data: tuple) -> tuple:
4040
"""enrich a single facility"""
4141
facility_id, facility = facility_data
4242
facility_name = facility["name"]

enrichers/openstreetmap.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
from schemas import default_coords
2-
from enrichers import Enrichment
1+
from enrichers import (
2+
default_coords,
3+
Enrichment,
4+
)
35
from utils import logger
46

57

main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import logging
2424
from file_utils import export_to_file, print_summary
2525
import default_data
26-
from enricher import enrich_facility_data
26+
from enrichers import enrich_facility_data
2727
from schemas import supported_output_types
2828
from scraper import ICEGovFacilityScraper
2929
from field_offices import ICEFieldOfficeScraper

schemas.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,6 @@
11
import copy
22
import datetime
33

4-
OSM_DELAY = 1.0 # 1 second between requests as per OSM policy
5-
WIKIDATA_DELAY = 0.5 # Be respectful to Wikidata
6-
WIKIPEDIA_DELAY = 0.5 # Be respectful to Wikipedia
7-
8-
# default to Washington, D.C.?
9-
default_coords: dict = {
10-
"latitude": 38.89511000,
11-
"longitude": -77.03637000,
12-
}
13-
144
facilities_schema = {
155
"scraped_date": datetime.datetime.now(datetime.UTC),
166
"scrape_runtime": 0,

0 commit comments

Comments
 (0)