File tree Expand file tree Collapse file tree 5 files changed +43
-42
lines changed Expand file tree Collapse file tree 5 files changed +43
-42
lines changed Original file line number Diff line number Diff line change 11import copy
22import requests
33from requests .adapters import HTTPAdapter
4+ from schemas import facilities_schema
45import time
56from urllib .parse import quote
67import urllib3
7- from utils import logger , facilities_schema
8+ from utils import logger
89
910# ExternalDataEnricher class for enrichment logic
1011
Original file line number Diff line number Diff line change 11import csv
22import json
3- from utils import (
3+ from schemas import (
44 debug_schema ,
55 facility_schema ,
66 enrichment_schema ,
7- logger ,
87)
8+ from utils import logger
99
1010
1111def export_to_file (
Original file line number Diff line number Diff line change 1+ import datetime
2+
3+ facilities_schema = {
4+ "scraped_date" : datetime .datetime .utcnow (),
5+ "page_updated_date" : datetime .datetime .utcnow (),
6+ "scrape_runtime" : 0 ,
7+ "enrich_runtime" : 0 ,
8+ "facilities" : [],
9+ }
10+
11+ facility_schema = {
12+ "address" : "" ,
13+ "administrative_area" : "" ,
14+ "country" : "" ,
15+ "facility_url" : "" ,
16+ "field_office" : "" ,
17+ "full_address" : "" ,
18+ "image_url" : "" ,
19+ "locality" : "" ,
20+ "name" : "" ,
21+ "phone" : "" ,
22+ "postal_code" : "" ,
23+ "raw_scrape" : "" ,
24+ "source_url" : "" ,
25+ }
26+ enrichment_schema = [
27+ "wikipedia_page_url" ,
28+ "wikidata_page_url" ,
29+ "osm_result_url" ,
30+ ]
31+ debug_schema = [
32+ "wikipedia_search_query" ,
33+ "wikidata_search_query" ,
34+ "osm_search_query" ,
35+ ]
Original file line number Diff line number Diff line change 66import re
77import requests
88from requests .adapters import HTTPAdapter
9- import time
10- import urllib3
11- from utils import (
9+ from schemas import (
1210 facilities_schema ,
1311 facility_schema ,
14- logger ,
1512)
13+ import time
14+ import urllib3
15+ from utils import logger
1616
1717
1818class ICEFacilityScraper (object ):
Original file line number Diff line number Diff line change 11# For general helpers, regexes, or shared logic (e.g. phone/address parsing functions).
2- import datetime
32import logging
43
54logger = logging .getLogger (__name__ )
65logger .setLevel (logging .INFO )
76logger .addHandler (logging .StreamHandler ())
8-
9- facilities_schema = {
10- "scraped_date" : datetime .datetime .utcnow (),
11- "page_updated_date" : datetime .datetime .utcnow (),
12- "scrape_runtime" : 0 ,
13- "enrich_runtime" : 0 ,
14- "facilities" : [],
15- }
16-
17- facility_schema = {
18- "address" : "" ,
19- "administrative_area" : "" ,
20- "country" : "" ,
21- "facility_url" : "" ,
22- "field_office" : "" ,
23- "full_address" : "" ,
24- "image_url" : "" ,
25- "locality" : "" ,
26- "name" : "" ,
27- "phone" : "" ,
28- "postal_code" : "" ,
29- "raw_scrape" : "" ,
30- "source_url" : "" ,
31- }
32- enrichment_schema = [
33- "wikipedia_page_url" ,
34- "wikidata_page_url" ,
35- "osm_result_url" ,
36- ]
37- debug_schema = [
38- "wikipedia_search_query" ,
39- "wikidata_search_query" ,
40- "osm_search_query" ,
41- ]
You can’t perform that action at this time.
0 commit comments