Skip to content

Commit d65b946

Browse files
refactor: move registry logic to separate module
1 parent f669602 commit d65b946

File tree

4 files changed

+144
-132
lines changed

4 files changed

+144
-132
lines changed

oc4ids_datastore_pipeline/pipeline.py

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,47 +8,11 @@
88
from libcoveoc4ids.api import oc4ids_json_output
99

1010
from oc4ids_datastore_pipeline.database import Dataset, save_dataset
11+
from oc4ids_datastore_pipeline.registry import fetch_registered_datasets
1112

1213
logger = logging.getLogger(__name__)
1314

1415

15-
def fetch_registered_datasets() -> dict[str, str]:
16-
logger.info("Fetching registered datasets list from registry")
17-
try:
18-
url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/dataset/records_api.json" # noqa: E501
19-
r = requests.get(url)
20-
r.raise_for_status()
21-
json_data = r.json()
22-
registered_datasets = {
23-
key: value["fields"]["url"]["value"]
24-
for (key, value) in json_data["records"].items()
25-
}
26-
registered_datasets_count = len(registered_datasets)
27-
logger.info(f"Fetched URLs for {registered_datasets_count} datasets")
28-
return registered_datasets
29-
except Exception as e:
30-
raise Exception("Failed to fetch datasets list from registry", e)
31-
32-
33-
def fetch_license_mappings() -> dict[str, str]:
34-
logger.info("Fetching license mappings from registry")
35-
try:
36-
url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/license/records_api.json" # noqa: E501
37-
r = requests.get(url)
38-
r.raise_for_status()
39-
json_data = r.json()
40-
return {
41-
urls["fields"]["url"]["value"]: license["fields"]["title"]["value"]
42-
for license in json_data["records"].values()
43-
for urls in license["fields"]["urls"]["values"]
44-
}
45-
except Exception as e:
46-
logger.warning(
47-
"Failed to fetch license mappings from registry, with error: " + str(e),
48-
)
49-
return {}
50-
51-
5216
def download_json(url: str) -> Any:
5317
logger.info(f"Downloading json from {url}")
5418
try:
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import logging
2+
3+
import requests
4+
5+
logger = logging.getLogger(__name__)
6+
7+
8+
def fetch_registered_datasets() -> dict[str, str]:
9+
logger.info("Fetching registered datasets list from registry")
10+
try:
11+
url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/dataset/records_api.json" # noqa: E501
12+
r = requests.get(url)
13+
r.raise_for_status()
14+
json_data = r.json()
15+
registered_datasets = {
16+
key: value["fields"]["url"]["value"]
17+
for (key, value) in json_data["records"].items()
18+
}
19+
registered_datasets_count = len(registered_datasets)
20+
logger.info(f"Fetched URLs for {registered_datasets_count} datasets")
21+
return registered_datasets
22+
except Exception as e:
23+
raise Exception("Failed to fetch datasets list from registry", e)
24+
25+
26+
def fetch_license_mappings() -> dict[str, str]:
27+
logger.info("Fetching license mappings from registry")
28+
try:
29+
url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/license/records_api.json" # noqa: E501
30+
r = requests.get(url)
31+
r.raise_for_status()
32+
json_data = r.json()
33+
return {
34+
urls["fields"]["url"]["value"]: license["fields"]["title"]["value"]
35+
for license in json_data["records"].values()
36+
for urls in license["fields"]["urls"]["values"]
37+
}
38+
except Exception as e:
39+
logger.warning(
40+
"Failed to fetch license mappings from registry, with error: " + str(e),
41+
)
42+
return {}

tests/test_pipeline.py

Lines changed: 0 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,113 +1,18 @@
11
import os
22
import tempfile
33
from textwrap import dedent
4-
from unittest.mock import MagicMock
54

65
import pytest
76
from pytest_mock import MockerFixture
87

98
from oc4ids_datastore_pipeline.pipeline import (
109
download_json,
11-
fetch_license_mappings,
12-
fetch_registered_datasets,
1310
process_dataset,
1411
validate_json,
1512
write_json_to_file,
1613
)
1714

1815

19-
def test_fetch_registered_datasets(mocker: MockerFixture) -> None:
20-
mock_response = MagicMock()
21-
mock_response.json.return_value = {
22-
"records": {
23-
"test_dataset": {"fields": {"url": {"value": "https://test_dataset.json"}}}
24-
}
25-
}
26-
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
27-
patch_get.return_value = mock_response
28-
29-
result = fetch_registered_datasets()
30-
31-
assert result == {"test_dataset": "https://test_dataset.json"}
32-
33-
34-
def test_fetch_registered_datasets_raises_failure_exception(
35-
mocker: MockerFixture,
36-
) -> None:
37-
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
38-
patch_get.side_effect = Exception("Mocked exception")
39-
40-
with pytest.raises(Exception) as exc_info:
41-
fetch_registered_datasets()
42-
43-
assert "Failed to fetch datasets list from registry" in str(exc_info.value)
44-
assert "Mocked exception" in str(exc_info.value)
45-
46-
47-
def test_fetch_license_mappings(mocker: MockerFixture) -> None:
48-
mock_response = MagicMock()
49-
mock_response.json.return_value = {
50-
"records": {
51-
"license_1": {
52-
"fields": {
53-
"title": {"value": "License 1"},
54-
"urls": {
55-
"values": [
56-
{
57-
"fields": {
58-
"url": {"value": "https://license_1.com/license"}
59-
}
60-
},
61-
{
62-
"fields": {
63-
"url": {
64-
"value": "https://license_1.com/different_url"
65-
}
66-
}
67-
},
68-
]
69-
},
70-
}
71-
},
72-
"license_2": {
73-
"fields": {
74-
"title": {"value": "License 2"},
75-
"urls": {
76-
"values": [
77-
{
78-
"fields": {
79-
"url": {"value": "https://license_2.com/license"}
80-
}
81-
},
82-
]
83-
},
84-
}
85-
},
86-
}
87-
}
88-
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
89-
patch_get.return_value = mock_response
90-
91-
result = fetch_license_mappings()
92-
93-
assert result == {
94-
"https://license_1.com/license": "License 1",
95-
"https://license_1.com/different_url": "License 1",
96-
"https://license_2.com/license": "License 2",
97-
}
98-
99-
100-
def test_fetch_license_mappings_catches_exception(
101-
mocker: MockerFixture,
102-
) -> None:
103-
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
104-
patch_get.side_effect = Exception("Mocked exception")
105-
106-
result = fetch_license_mappings()
107-
108-
assert result == {}
109-
110-
11116
def test_download_json_raises_failure_exception(mocker: MockerFixture) -> None:
11217
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
11318
patch_get.side_effect = Exception("Mocked exception")

tests/test_registry.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from unittest.mock import MagicMock
2+
3+
import pytest
4+
from pytest_mock import MockerFixture
5+
6+
from oc4ids_datastore_pipeline.registry import (
7+
fetch_license_mappings,
8+
fetch_registered_datasets,
9+
)
10+
11+
12+
def test_fetch_registered_datasets(mocker: MockerFixture) -> None:
13+
mock_response = MagicMock()
14+
mock_response.json.return_value = {
15+
"records": {
16+
"test_dataset": {"fields": {"url": {"value": "https://test_dataset.json"}}}
17+
}
18+
}
19+
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
20+
patch_get.return_value = mock_response
21+
22+
result = fetch_registered_datasets()
23+
24+
assert result == {"test_dataset": "https://test_dataset.json"}
25+
26+
27+
def test_fetch_registered_datasets_raises_failure_exception(
28+
mocker: MockerFixture,
29+
) -> None:
30+
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
31+
patch_get.side_effect = Exception("Mocked exception")
32+
33+
with pytest.raises(Exception) as exc_info:
34+
fetch_registered_datasets()
35+
36+
assert "Failed to fetch datasets list from registry" in str(exc_info.value)
37+
assert "Mocked exception" in str(exc_info.value)
38+
39+
40+
def test_fetch_license_mappings(mocker: MockerFixture) -> None:
41+
mock_response = MagicMock()
42+
mock_response.json.return_value = {
43+
"records": {
44+
"license_1": {
45+
"fields": {
46+
"title": {"value": "License 1"},
47+
"urls": {
48+
"values": [
49+
{
50+
"fields": {
51+
"url": {"value": "https://license_1.com/license"}
52+
}
53+
},
54+
{
55+
"fields": {
56+
"url": {
57+
"value": "https://license_1.com/different_url"
58+
}
59+
}
60+
},
61+
]
62+
},
63+
}
64+
},
65+
"license_2": {
66+
"fields": {
67+
"title": {"value": "License 2"},
68+
"urls": {
69+
"values": [
70+
{
71+
"fields": {
72+
"url": {"value": "https://license_2.com/license"}
73+
}
74+
},
75+
]
76+
},
77+
}
78+
},
79+
}
80+
}
81+
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
82+
patch_get.return_value = mock_response
83+
84+
result = fetch_license_mappings()
85+
86+
assert result == {
87+
"https://license_1.com/license": "License 1",
88+
"https://license_1.com/different_url": "License 1",
89+
"https://license_2.com/license": "License 2",
90+
}
91+
92+
93+
def test_fetch_license_mappings_catches_exception(
94+
mocker: MockerFixture,
95+
) -> None:
96+
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
97+
patch_get.side_effect = Exception("Mocked exception")
98+
99+
result = fetch_license_mappings()
100+
101+
assert result == {}

0 commit comments

Comments
 (0)