11import copy
2- import csv
32import json
43from schemas import enrichment_print_schema
54from utils import (
6- _flatdict ,
5+ convert_to_dataframe ,
76 logger ,
87)
8+ import xlsxwriter # type: ignore [import-untyped]
99
1010
1111def export_to_file (
@@ -18,30 +18,20 @@ def export_to_file(
1818 return ""
1919
2020 full_name = f"{ filename } .{ file_type } "
21- csv_filtered_keys = [
22- "_repaired_record" ,
23- "raw_scrape" ,
24- "wikipedia.search_query" ,
25- "wikidata.search_query" ,
26- "osm.search_query" ,
27- "source_urls" ,
28- ]
29- try :
30- with open (full_name , "w" , newline = "" , encoding = "utf-8" ) as f_out :
31- if file_type == "csv" :
32- flatdata = [_flatdict (f ) for _ , f in facilities_data ["facilities" ].items ()]
33- fieldnames = [k for k in flatdata [0 ].keys () if k not in csv_filtered_keys ]
34-
35- writer = csv .DictWriter (f_out , fieldnames = fieldnames )
36- writer .writeheader ()
37- for facility in flatdata :
38- row_data = {field : facility .get (field , None ) for field in fieldnames }
39- writer .writerow (row_data )
40- elif file_type == "json" :
41- json .dump (facilities_data , f_out , indent = 2 , sort_keys = True , default = str )
42- except Exception as e :
43- logger .error ("Error writing %s file: %s" , file_type , e )
44- return ""
21+ if file_type in ["csv" , "xlsx" , "parquet" ]:
22+ writer = convert_to_dataframe (facilities_data ["facilities" ])
23+ match file_type :
24+ case "xlsx" :
25+ with xlsxwriter .Workbook (full_name , {"remove_timezone" : True }) as wb :
26+ writer .write_excel (workbook = wb , include_header = True , autofit = True )
27+ case "csv" :
28+ with open (full_name , "w" , newline = "" , encoding = "utf-8" ) as f_out :
29+ writer .write_csv (file = f_out , include_header = True )
30+ case "parquet" :
31+ writer .write_parquet (full_name , use_pyarrow = True )
32+ elif file_type == "json" :
33+ with open (full_name , "w" , encoding = "utf-8" ) as f_out :
34+ json .dump (facilities_data , f_out , indent = 2 , sort_keys = True , default = str )
4535
4636 logger .info (
4737 "%s file '%s.%s' created successfully with %s facilities." ,
@@ -68,8 +58,7 @@ def print_summary(facilities_data: dict) -> None:
6858 # Count by field office
6959 field_offices : dict = {}
7060 for facility_id , facility in facilities_data ["facilities" ].items ():
71- office = facility .get ("field_office" , "Unknown" )
72- field_offices [office ] = field_offices .get (office , 0 ) + 1
61+ field_offices [facility ["field_office" ]] = field_offices .get (facility ["field_office" ], 0 ) + 1
7362
7463 logger .info ("\n Facilities by Field Office:" )
7564 for office , count in sorted (field_offices .items (), key = lambda x : x [1 ], reverse = True ):
0 commit comments