Skip to content

Commit 40117c6

Browse files
refactor: pull metadata out of json_data within function
1 parent d65b946 commit 40117c6

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def download_json(url: str) -> Any:
2525
raise Exception("Download failed", e)
2626

2727

28-
def validate_json(dataset_name: str, json_data: Any) -> None:
28+
def validate_json(dataset_name: str, json_data: dict[str, Any]) -> None:
2929
logger.info(f"Validating dataset {dataset_name}")
3030
try:
3131
validation_result = oc4ids_json_output(json_data=json_data)
@@ -37,26 +37,28 @@ def validate_json(dataset_name: str, json_data: Any) -> None:
3737
raise Exception("Validation failed", e)
3838

3939

40-
def write_json_to_file(file_name: str, json_data: Any) -> None:
40+
def write_json_to_file(file_name: str, json_data: dict[str, Any]) -> str:
4141
logger.info(f"Writing dataset to file {file_name}")
4242
try:
4343
os.makedirs(os.path.dirname(file_name), exist_ok=True)
4444
with open(file_name, "w") as file:
4545
json.dump(json_data, file, indent=4)
4646
logger.info(f"Finished writing to {file_name}")
47+
return file_name
4748
except Exception as e:
4849
raise Exception("Error while writing to JSON file", e)
4950

5051

5152
def save_dataset_metadata(
52-
dataset_name: str, source_url: str, publisher_name: str, file_name: str
53+
dataset_name: str, source_url: str, json_data: dict[str, Any], json_url: str
5354
) -> None:
5455
logger.info(f"Saving metadata for dataset {dataset_name}")
56+
publisher_name = json_data.get("publisher", {}).get("name", "")
5557
dataset = Dataset(
5658
dataset_id=dataset_name,
5759
source_url=source_url,
5860
publisher_name=publisher_name,
59-
json_url=file_name,
61+
json_url=json_url,
6062
updated_at=datetime.datetime.now(datetime.UTC),
6163
)
6264
save_dataset(dataset)
@@ -67,14 +69,12 @@ def process_dataset(dataset_name: str, dataset_url: str) -> None:
6769
try:
6870
json_data = download_json(dataset_url)
6971
validate_json(dataset_name, json_data)
70-
file_name = f"data/{dataset_name}.json"
71-
write_json_to_file(file_name, json_data)
72-
publisher_name = json_data.get("publisher", {}).get("name", "")
72+
json_url = write_json_to_file(f"data/{dataset_name}.json", json_data)
7373
save_dataset_metadata(
7474
dataset_name=dataset_name,
7575
source_url=dataset_url,
76-
publisher_name=publisher_name,
77-
file_name=file_name,
76+
json_data=json_data,
77+
json_url=json_url,
7878
)
7979
logger.info(f"Processed dataset {dataset_name}")
8080
except Exception as e:

0 commit comments

Comments
 (0)