Skip to content

Commit 4bef80f

Browse files
committed
move rough schemas to a separate file
Signed-off-by: John Seekins <[email protected]>
1 parent 65e7c30 commit 4bef80f

File tree

5 files changed

+43
-42
lines changed

5 files changed

+43
-42
lines changed

enricher.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import copy
22
import requests
33
from requests.adapters import HTTPAdapter
4+
from schemas import facilities_schema
45
import time
56
from urllib.parse import quote
67
import urllib3
7-
from utils import logger, facilities_schema
8+
from utils import logger
89

910
# ExternalDataEnricher class for enrichment logic
1011

file_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import csv
22
import json
3-
from utils import (
3+
from schemas import (
44
debug_schema,
55
facility_schema,
66
enrichment_schema,
7-
logger,
87
)
8+
from utils import logger
99

1010

1111
def export_to_file(

schemas.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import datetime
2+
3+
facilities_schema = {
4+
"scraped_date": datetime.datetime.utcnow(),
5+
"page_updated_date": datetime.datetime.utcnow(),
6+
"scrape_runtime": 0,
7+
"enrich_runtime": 0,
8+
"facilities": [],
9+
}
10+
11+
facility_schema = {
12+
"address": "",
13+
"administrative_area": "",
14+
"country": "",
15+
"facility_url": "",
16+
"field_office": "",
17+
"full_address": "",
18+
"image_url": "",
19+
"locality": "",
20+
"name": "",
21+
"phone": "",
22+
"postal_code": "",
23+
"raw_scrape": "",
24+
"source_url": "",
25+
}
26+
enrichment_schema = [
27+
"wikipedia_page_url",
28+
"wikidata_page_url",
29+
"osm_result_url",
30+
]
31+
debug_schema = [
32+
"wikipedia_search_query",
33+
"wikidata_search_query",
34+
"osm_search_query",
35+
]

scraper.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
import re
77
import requests
88
from requests.adapters import HTTPAdapter
9-
import time
10-
import urllib3
11-
from utils import (
9+
from schemas import (
1210
facilities_schema,
1311
facility_schema,
14-
logger,
1512
)
13+
import time
14+
import urllib3
15+
from utils import logger
1616

1717

1818
class ICEFacilityScraper(object):

utils.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,6 @@
11
# For general helpers, regexes, or shared logic (e.g. phone/address parsing functions).
2-
import datetime
32
import logging
43

54
logger = logging.getLogger(__name__)
65
logger.setLevel(logging.INFO)
76
logger.addHandler(logging.StreamHandler())
8-
9-
facilities_schema = {
10-
"scraped_date": datetime.datetime.utcnow(),
11-
"page_updated_date": datetime.datetime.utcnow(),
12-
"scrape_runtime": 0,
13-
"enrich_runtime": 0,
14-
"facilities": [],
15-
}
16-
17-
facility_schema = {
18-
"address": "",
19-
"administrative_area": "",
20-
"country": "",
21-
"facility_url": "",
22-
"field_office": "",
23-
"full_address": "",
24-
"image_url": "",
25-
"locality": "",
26-
"name": "",
27-
"phone": "",
28-
"postal_code": "",
29-
"raw_scrape": "",
30-
"source_url": "",
31-
}
32-
enrichment_schema = [
33-
"wikipedia_page_url",
34-
"wikidata_page_url",
35-
"osm_result_url",
36-
]
37-
debug_schema = [
38-
"wikipedia_search_query",
39-
"wikidata_search_query",
40-
"osm_search_query",
41-
]

0 commit comments

Comments
 (0)