11import copy
2- import csv
32import json
43from schemas import enrichment_print_schema
54from utils import (
6- _flatdict ,
5+ convert_to_dataframe ,
76 logger ,
87)
8+ import xlsxwriter # type: ignore [import-untyped]
99
1010
1111def export_to_file (
@@ -18,23 +18,20 @@ def export_to_file(
1818 return ""
1919
2020 full_name = f"{ filename } .{ file_type } "
21- csv_filtered_keys = ["raw_scrape" , "wikipedia_search_query" , "wikidata_search_query" , "osm_search_query" ]
22- try :
23- with open (full_name , "w" , newline = "" , encoding = "utf-8" ) as f_out :
24- if file_type == "csv" :
25- flatdata = [_flatdict (f ) for _ , f in facilities_data ["facilities" ].items ()]
26- fieldnames = [k for k in flatdata [0 ].keys () if k not in csv_filtered_keys ]
27-
28- writer = csv .DictWriter (f_out , fieldnames = fieldnames )
29- writer .writeheader ()
30- for facility in flatdata :
31- row_data = {field : facility .get (field , None ) for field in fieldnames }
32- writer .writerow (row_data )
33- elif file_type == "json" :
34- json .dump (facilities_data , f_out , indent = 2 , sort_keys = True , default = str )
35- except Exception as e :
36- logger .error ("Error writing %s file: %s" , file_type , e )
37- return ""
21+ if file_type in ["csv" , "xlsx" , "parquet" ]:
22+ writer = convert_to_dataframe (facilities_data ["facilities" ])
23+ match file_type :
24+ case "xlsx" :
25+ with xlsxwriter .Workbook (full_name , {"remove_timezone" : True }) as wb :
26+ writer .write_excel (workbook = wb , include_header = True , autofit = True )
27+ case "csv" :
28+ with open (full_name , "w" , newline = "" , encoding = "utf-8" ) as f_out :
29+ writer .write_csv (file = f_out , include_header = True )
30+ case "parquet" :
31+ writer .write_parquet (full_name , use_pyarrow = True )
32+ elif file_type == "json" :
33+ with open (full_name , "w" , encoding = "utf-8" ) as f_out :
34+ json .dump (facilities_data , f_out , indent = 2 , sort_keys = True , default = str )
3835
3936 logger .info (
4037 "%s file '%s.%s' created successfully with %s facilities." ,
@@ -61,8 +58,7 @@ def print_summary(facilities_data: dict) -> None:
6158 # Count by field office
6259 field_offices : dict = {}
6360 for facility_id , facility in facilities_data ["facilities" ].items ():
64- office = facility .get ("field_office" , "Unknown" )
65- field_offices [office ] = field_offices .get (office , 0 ) + 1
61+ field_offices [facility ["field_office" ]] = field_offices .get (facility ["field_office" ], 0 ) + 1
6662
6763 logger .info ("\n Facilities by Field Office:" )
6864 for office , count in sorted (field_offices .items (), key = lambda x : x [1 ], reverse = True ):
0 commit comments