Skip to content

Commit d3c52cc

Browse files
feat: write json to file
1 parent 4f8e1ce commit d3c52cc

File tree

3 files changed

+49
-0
lines changed

3 files changed

+49
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
.venv
22
__pycache__
3+
4+
data/

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import json
12
import logging
3+
import os
24
from typing import Any
35

46
import requests
@@ -41,11 +43,23 @@ def validate_json(dataset_name: str, json_data: Any) -> None:
4143
raise Exception("Validation failed", e)
4244

4345

46+
def write_json_to_file(file_name: str, json_data: Any) -> None:
47+
logger.info(f"Writing dataset to file {file_name}")
48+
try:
49+
os.makedirs(os.path.dirname(file_name), exist_ok=True)
50+
with open(file_name, "w") as file:
51+
json.dump(json_data, file, indent=4)
52+
logger.info(f"Finished writing to {file_name}")
53+
except Exception as e:
54+
raise Exception("Error while writing to JSON file", e)
55+
56+
4457
def process_dataset(dataset_name: str, dataset_url: str) -> None:
4558
logger.info(f"Processing dataset {dataset_name}")
4659
try:
4760
json_data = download_json(dataset_url)
4861
validate_json(dataset_name, json_data)
62+
write_json_to_file(f"data/{dataset_name}.json", json_data)
4963
logger.info(f"Processed dataset {dataset_name}")
5064
except Exception as e:
5165
logger.warning(f"Failed to process dataset {dataset_name} with error {e}")

tests/test_pipeline.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1+
import os
2+
import tempfile
3+
from textwrap import dedent
4+
15
import pytest
26
from pytest_mock import MockerFixture
37

48
from oc4ids_datastore_pipeline.pipeline import (
59
download_json,
610
process_dataset,
711
validate_json,
12+
write_json_to_file,
813
)
914

1015

@@ -47,6 +52,34 @@ def test_validate_json_raises_validation_errors_exception(
4752
assert "Dataset has 2 validation errors" in str(exc_info.value)
4853

4954

55+
def test_write_json_to_file_writes_in_correct_format() -> None:
56+
with tempfile.TemporaryDirectory() as dir:
57+
file_name = os.path.join(dir, "test_dataset.json")
58+
write_json_to_file(file_name=file_name, json_data={"key": "value"})
59+
60+
expected = dedent(
61+
"""\
62+
{
63+
"key": "value"
64+
}"""
65+
)
66+
with open(file_name) as file:
67+
assert file.read() == expected
68+
69+
70+
def test_write_json_to_file_raises_failure_exception(mocker: MockerFixture) -> None:
71+
patch_json_dump = mocker.patch("oc4ids_datastore_pipeline.pipeline.json.dump")
72+
patch_json_dump.side_effect = Exception("Mocked exception")
73+
74+
with pytest.raises(Exception) as exc_info:
75+
with tempfile.TemporaryDirectory() as dir:
76+
file_name = os.path.join(dir, "test_dataset.json")
77+
write_json_to_file(file_name=file_name, json_data={"key": "value"})
78+
79+
assert "Error while writing to JSON file" in str(exc_info.value)
80+
assert "Mocked exception" in str(exc_info.value)
81+
82+
5083
def test_process_dataset_catches_exception(mocker: MockerFixture) -> None:
5184
patch_download_json = mocker.patch(
5285
"oc4ids_datastore_pipeline.pipeline.download_json"

0 commit comments

Comments
 (0)