1
+ import datetime
1
2
import json
2
3
import logging
3
4
import os
6
7
import requests
7
8
from libcoveoc4ids .api import oc4ids_json_output
8
9
10
+ from oc4ids_datastore_pipeline .database import Dataset , create_tables , save_dataset
11
+
9
12
logger = logging .getLogger (__name__ )
10
13
11
14
@@ -62,12 +65,34 @@ def write_json_to_file(file_name: str, json_data: Any) -> None:
62
65
raise Exception ("Error while writing to JSON file" , e )
63
66
64
67
68
+ def save_dataset_metadata (
69
+ dataset_name : str , source_url : str , publisher_name : str , file_name : str
70
+ ) -> None :
71
+ logger .info (f"Saving metadata for dataset { dataset_name } " )
72
+ dataset = Dataset (
73
+ dataset_id = dataset_name ,
74
+ source_url = source_url ,
75
+ publisher_name = publisher_name ,
76
+ json_url = file_name ,
77
+ updated_at = datetime .datetime .now (datetime .UTC ),
78
+ )
79
+ save_dataset (dataset )
80
+
81
+
65
82
def process_dataset (dataset_name : str , dataset_url : str ) -> None :
66
83
logger .info (f"Processing dataset { dataset_name } " )
67
84
try :
68
85
json_data = download_json (dataset_url )
69
86
validate_json (dataset_name , json_data )
70
- write_json_to_file (f"data/{ dataset_name } .json" , json_data )
87
+ file_name = f"data/{ dataset_name } .json"
88
+ write_json_to_file (file_name , json_data )
89
+ publisher_name = json_data .get ("publisher" , {}).get ("name" , "" )
90
+ save_dataset_metadata (
91
+ dataset_name = dataset_name ,
92
+ source_url = dataset_url ,
93
+ publisher_name = publisher_name ,
94
+ file_name = file_name ,
95
+ )
71
96
logger .info (f"Processed dataset { dataset_name } " )
72
97
except Exception as e :
73
98
logger .warning (f"Failed to process dataset { dataset_name } with error { e } " )
@@ -80,4 +105,5 @@ def process_datasets() -> None:
80
105
81
106
82
107
def run () -> None :
108
+ create_tables ()
83
109
process_datasets ()
0 commit comments