Skip to content

Commit 36ced9c

Browse files
committed
add parquet output as well (potential support for s3-style storage?)
Signed-off-by: John Seekins <[email protected]>
1 parent d727c38 commit 36ced9c

File tree

4 files changed

+21
-15
lines changed

4 files changed

+21
-15
lines changed

file_utils.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,19 @@ def export_to_file(
1818
return ""
1919

2020
full_name = f"{filename}.{file_type}"
21-
if file_type in ["csv", "xlsx"]:
21+
if file_type in ["csv", "xlsx", "parquet"]:
2222
writer = convert_to_dataframe(facilities_data["facilities"])
23-
if file_type == "xlsx":
24-
with xlsxwriter.Workbook(full_name, {"remove_timezone": True}) as wb:
25-
writer.write_excel(workbook=wb, include_header=True, autofit=True)
26-
elif file_type == "csv":
27-
with open(full_name, "w", newline="", encoding="utf-8") as f_out:
28-
writer.write_csv(file=f_out, include_header=True)
23+
match file_type:
24+
case "xlsx":
25+
with xlsxwriter.Workbook(full_name, {"remove_timezone": True}) as wb:
26+
writer.write_excel(workbook=wb, include_header=True, autofit=True)
27+
case "csv":
28+
with open(full_name, "w", newline="", encoding="utf-8") as f_out:
29+
writer.write_csv(file=f_out, include_header=True)
30+
case "parquet":
31+
writer.write_parquet(full_name, use_pyarrow=True)
2932
elif file_type == "json":
30-
with open(full_name, "w", newline="", encoding="utf-8") as f_out:
33+
with open(full_name, "w", encoding="utf-8") as f_out:
3134
json.dump(facilities_data, f_out, indent=2, sort_keys=True, default=str)
3235

3336
logger.info(

main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from file_utils import export_to_file, print_summary
2525
import default_data
2626
from enricher import ExternalDataEnricher
27+
from schemas import supported_output_types
2728
from scraper import ICEGovFacilityScraper
2829
from utils import logger
2930
# CLI, argument parsing, script orchestration
@@ -58,7 +59,7 @@ def main() -> None:
5859
parser.add_argument(
5960
"--file-type",
6061
default="csv",
61-
choices=["csv", "json", "xlsx"],
62+
choices=supported_output_types,
6263
help="type of file to export",
6364
)
6465
parser.add_argument(

schemas.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,5 @@
106106
}
107107

108108
default_field_office = "(Possibly) Not managed by DHS field office"
109+
110+
supported_output_types = ["csv", "json", "xlsx", "parquet"]

uv.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)