@@ -25,7 +25,7 @@ def download_json(url: str) -> Any:
25
25
raise Exception ("Download failed" , e )
26
26
27
27
28
- def validate_json (dataset_name : str , json_data : Any ) -> None :
28
+ def validate_json (dataset_name : str , json_data : dict [ str , Any ] ) -> None :
29
29
logger .info (f"Validating dataset { dataset_name } " )
30
30
try :
31
31
validation_result = oc4ids_json_output (json_data = json_data )
@@ -37,26 +37,28 @@ def validate_json(dataset_name: str, json_data: Any) -> None:
37
37
raise Exception ("Validation failed" , e )
38
38
39
39
40
- def write_json_to_file (file_name : str , json_data : Any ) -> None :
40
+ def write_json_to_file (file_name : str , json_data : dict [ str , Any ] ) -> str :
41
41
logger .info (f"Writing dataset to file { file_name } " )
42
42
try :
43
43
os .makedirs (os .path .dirname (file_name ), exist_ok = True )
44
44
with open (file_name , "w" ) as file :
45
45
json .dump (json_data , file , indent = 4 )
46
46
logger .info (f"Finished writing to { file_name } " )
47
+ return file_name
47
48
except Exception as e :
48
49
raise Exception ("Error while writing to JSON file" , e )
49
50
50
51
51
52
def save_dataset_metadata (
52
- dataset_name : str , source_url : str , publisher_name : str , file_name : str
53
+ dataset_name : str , source_url : str , json_data : dict [ str , Any ], json_url : str
53
54
) -> None :
54
55
logger .info (f"Saving metadata for dataset { dataset_name } " )
56
+ publisher_name = json_data .get ("publisher" , {}).get ("name" , "" )
55
57
dataset = Dataset (
56
58
dataset_id = dataset_name ,
57
59
source_url = source_url ,
58
60
publisher_name = publisher_name ,
59
- json_url = file_name ,
61
+ json_url = json_url ,
60
62
updated_at = datetime .datetime .now (datetime .UTC ),
61
63
)
62
64
save_dataset (dataset )
@@ -67,14 +69,12 @@ def process_dataset(dataset_name: str, dataset_url: str) -> None:
67
69
try :
68
70
json_data = download_json (dataset_url )
69
71
validate_json (dataset_name , json_data )
70
- file_name = f"data/{ dataset_name } .json"
71
- write_json_to_file (file_name , json_data )
72
- publisher_name = json_data .get ("publisher" , {}).get ("name" , "" )
72
+ json_url = write_json_to_file (f"data/{ dataset_name } .json" , json_data )
73
73
save_dataset_metadata (
74
74
dataset_name = dataset_name ,
75
75
source_url = dataset_url ,
76
- publisher_name = publisher_name ,
77
- file_name = file_name ,
76
+ json_data = json_data ,
77
+ json_url = json_url ,
78
78
)
79
79
logger .info (f"Processed dataset { dataset_name } " )
80
80
except Exception as e :
0 commit comments