44import time
55from urllib .parse import quote
66import urllib3
7- from utils import logger
7+ from utils import logger , facilities_schema
88
99# ExternalDataEnricher class for enrichment logic
1010
@@ -27,14 +27,14 @@ def __init__(self):
2727 self .session .headers .update ({"User-Agent" : "ICE-Facilities-Research/1.0 (Educational Research Purpose)" })
2828
2929 def enrich_facility_data (self , facilities_data : dict ) -> dict :
30+ start_time = time .time ()
3031 logger .info ("Starting data enrichment with external sources..." )
31- enriched_data = copy .deepcopy (facilities_data )
32- enriched_data ["facilities" ] = []
32+ enriched_data = copy .deepcopy (facilities_schema )
3333 total = len (facilities_data ["facilities" ])
3434
3535 for i , facility in enumerate (facilities_data ["facilities" ]):
3636 logger .info ("Processing facility %s/%s: %s..." , i + 1 , total , facility ["name" ])
37- enriched_facility = facility . copy ( )
37+ enriched_facility = copy . deepcopy ( facility )
3838 base_enrichment = {
3939 "wikipedia_page_url" : "" ,
4040 "wikipedia_search_query" : "" ,
@@ -81,13 +81,11 @@ def enrich_facility_data(self, facilities_data: dict) -> dict:
8181 enriched_facility ["osm_result_url" ] = ""
8282 enriched_facility ["osm_search_query" ] = str (e )
8383
84- enriched_data ["facilities" ].append (enriched_facility )
85-
86- # do we need the "progress bar" if we show the count in the beginning message?
87- # if (i + 1) % 10 == 0:
88- # logger.info(" Progress: %s/%s facilities processed", i + 1, total)
84+ enriched_data ["facilities" ].append (enriched_facility ) # type: ignore [attr-defined]
8985
9086 logger .info ("Data enrichment completed!" )
87+ enriched_data ["enrich_runtime" ] = time .time () - start_time
88+ logger .info (" Completed in %s seconds" , enriched_data ["enrich_runtime" ])
9189 return enriched_data
9290
9391 def _search_wikipedia (self , facility_name : str ) -> dict :
0 commit comments