2
2
import json
3
3
import logging
4
4
import os
5
- from typing import Any
5
+ from pathlib import Path
6
+ from typing import Any , Optional
6
7
8
+ import flattentool
7
9
import requests
8
10
from libcoveoc4ids .api import oc4ids_json_output
9
11
@@ -52,8 +54,33 @@ def write_json_to_file(file_name: str, json_data: dict[str, Any]) -> str:
52
54
raise Exception ("Error while writing to JSON file" , e )
53
55
54
56
57
+ def transform_to_csv_and_xlsx (json_path : str ) -> tuple [Optional [str ], Optional [str ]]:
58
+ logger .info (f"Transforming { json_path } " )
59
+ try :
60
+ path = Path (json_path )
61
+ flattentool .flatten (
62
+ json_path ,
63
+ output_name = str (path .parent / path .stem ),
64
+ root_list_path = "projects" ,
65
+ main_sheet_name = "projects" ,
66
+ ) # type: ignore[no-untyped-call]
67
+ csv_path = str (path .parent / path .stem )
68
+ xlsx_path = f"{ path .parent / path .stem } .xlsx"
69
+ logger .info (f"Transformed to CSV at { csv_path } " )
70
+ logger .info (f"Transformed to XLSX at { xlsx_path } " )
71
+ return csv_path , xlsx_path
72
+ except Exception as e :
73
+ logger .warning (f"Failed to transform JSON to CSV and XLSX with error { e } " )
74
+ return None , None
75
+
76
+
55
77
def save_dataset_metadata (
56
- dataset_name : str , source_url : str , json_data : dict [str , Any ], json_url : str
78
+ dataset_name : str ,
79
+ source_url : str ,
80
+ json_data : dict [str , Any ],
81
+ json_url : str ,
82
+ csv_url : Optional [str ],
83
+ xlsx_url : Optional [str ],
57
84
) -> None :
58
85
logger .info (f"Saving metadata for dataset { dataset_name } " )
59
86
publisher_name = json_data .get ("publisher" , {}).get ("name" , "" )
@@ -66,6 +93,8 @@ def save_dataset_metadata(
66
93
license_url = license_url ,
67
94
license_name = license_name ,
68
95
json_url = json_url ,
96
+ csv_url = csv_url ,
97
+ xlsx_url = xlsx_url ,
69
98
updated_at = datetime .datetime .now (datetime .UTC ),
70
99
)
71
100
save_dataset (dataset )
@@ -76,12 +105,17 @@ def process_dataset(dataset_name: str, dataset_url: str) -> None:
76
105
try :
77
106
json_data = download_json (dataset_url )
78
107
validate_json (dataset_name , json_data )
79
- json_url = write_json_to_file (f"data/{ dataset_name } .json" , json_data )
108
+ json_path = write_json_to_file (
109
+ f"data/{ dataset_name } /{ dataset_name } .json" , json_data
110
+ )
111
+ csv_path , xlsx_path = transform_to_csv_and_xlsx (json_path )
80
112
save_dataset_metadata (
81
113
dataset_name = dataset_name ,
82
114
source_url = dataset_url ,
83
115
json_data = json_data ,
84
- json_url = json_url ,
116
+ json_url = json_path ,
117
+ csv_url = csv_path ,
118
+ xlsx_url = xlsx_path ,
85
119
)
86
120
logger .info (f"Processed dataset { dataset_name } " )
87
121
except Exception as e :
0 commit comments