Skip to content

Commit afea275

Browse files
feat: download datasets
1 parent f06be90 commit afea275

File tree

4 files changed

+90
-4
lines changed

4 files changed

+90
-4
lines changed
Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,45 @@
11
import logging
2+
from typing import Any
3+
4+
import requests
25

36
logger = logging.getLogger(__name__)
47

8+
REGISTERED_DATASETS = {
9+
"uganda_gpp": "https://gpp.ppda.go.ug/adminapi/public/api/open-data/v1/infrastructure/projects/download?format=json", # noqa: E501
10+
"ghana_cost_sekondi_takoradi": "https://costsekondi-takoradigh.org/uploads/projectJson.json", # noqa: E501
11+
"mexico_cost_jalisco": "http://www.costjalisco.org.mx/jsonprojects",
12+
"mexico_nuevo_leon": "http://si.nl.gob.mx/siasi_ws/api/edcapi/DescargarProjectPackage", # noqa: E501
13+
"indonesia_cost_west_lombok": "https://intras.lombokbaratkab.go.id/oc4ids",
14+
"ukraine_cost_ukraine": "https://portal.costukraine.org/data.json",
15+
"malawi_cost_malawi": "https://ippi.mw/api/projects/query",
16+
}
17+
18+
19+
def download_json(url: str) -> Any:
20+
logger.info(f"Downloading json from {url}")
21+
try:
22+
r = requests.get(url)
23+
r.raise_for_status()
24+
response_size = len(r.content)
25+
logger.info(f"Downloaded {url} ({response_size} bytes)")
26+
return r.json()
27+
except Exception as e:
28+
raise Exception("Download failed", e)
29+
30+
31+
def process_dataset(dataset_name: str, dataset_url: str) -> None:
32+
logger.info(f"Processing dataset {dataset_name}")
33+
try:
34+
download_json(dataset_url)
35+
except Exception as e:
36+
logger.warning(f"Failed to process dataset {dataset_name} with error {e}")
37+
38+
39+
def process_datasets() -> None:
40+
for name, url in REGISTERED_DATASETS.items():
41+
process_dataset(name, url)
42+
543

644
def run() -> None:
7-
logger.info("Hello World!")
45+
process_datasets()

pyproject.toml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ name = "oc4ids-datastore-pipeline"
77
description = "OC4IDS Datastore Pipeline"
88
version = "0.1.0"
99
readme = "README.md"
10-
dependencies = []
10+
dependencies = [
11+
"requests"
12+
]
1113

1214
[project.optional-dependencies]
1315
dev = [
@@ -17,6 +19,8 @@ dev = [
1719
"Flake8-pyproject",
1820
"mypy",
1921
"pytest",
22+
"pytest-mock",
23+
"types-requests",
2024
]
2125

2226
[project.scripts]
@@ -30,3 +34,7 @@ max-line-length = 88
3034

3135
[tool.mypy]
3236
strict = true
37+
38+
[tool.pytest.ini_options]
39+
log_cli = true
40+
log_cli_level = "INFO"

requirements_dev.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
#
77
black==25.1.0
88
# via oc4ids-datastore-pipeline (pyproject.toml)
9+
certifi==2025.1.31
10+
# via requests
11+
charset-normalizer==3.4.1
12+
# via requests
913
click==8.1.8
1014
# via black
1115
flake8==7.1.1
@@ -14,6 +18,8 @@ flake8==7.1.1
1418
# oc4ids-datastore-pipeline (pyproject.toml)
1519
flake8-pyproject==1.2.3
1620
# via oc4ids-datastore-pipeline (pyproject.toml)
21+
idna==3.10
22+
# via requests
1723
iniconfig==2.0.0
1824
# via pytest
1925
isort==6.0.0
@@ -41,6 +47,18 @@ pycodestyle==2.12.1
4147
pyflakes==3.2.0
4248
# via flake8
4349
pytest==8.3.4
50+
# via
51+
# oc4ids-datastore-pipeline (pyproject.toml)
52+
# pytest-mock
53+
pytest-mock==3.14.0
54+
# via oc4ids-datastore-pipeline (pyproject.toml)
55+
requests==2.32.3
56+
# via oc4ids-datastore-pipeline (pyproject.toml)
57+
types-requests==2.32.0.20241016
4458
# via oc4ids-datastore-pipeline (pyproject.toml)
4559
typing-extensions==4.12.2
4660
# via mypy
61+
urllib3==2.3.0
62+
# via
63+
# requests
64+
# types-requests

tests/test_pipeline.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,24 @@
1-
def test_hello_world() -> None:
2-
pass
1+
import pytest
2+
from pytest_mock import MockerFixture
3+
4+
from oc4ids_datastore_pipeline.pipeline import download_json, process_dataset
5+
6+
7+
def test_download_json_raises_failure_exception(mocker: MockerFixture) -> None:
8+
patch_get = mocker.patch("oc4ids_datastore_pipeline.pipeline.requests.get")
9+
patch_get.side_effect = Exception("Mocked exception")
10+
11+
with pytest.raises(Exception) as exc_info:
12+
download_json(url="https://test_dataset.json")
13+
14+
assert "Download failed" in str(exc_info.value)
15+
assert "Mocked exception" in str(exc_info.value)
16+
17+
18+
def test_process_dataset_catches_exception(mocker: MockerFixture) -> None:
19+
patch_download_json = mocker.patch(
20+
"oc4ids_datastore_pipeline.pipeline.download_json"
21+
)
22+
patch_download_json.side_effect = Exception("Download failed")
23+
24+
process_dataset("test_dataset", "https://test_dataset.json")

0 commit comments

Comments
 (0)