From 19c0303aa2290ea42b35655b7964f5d408651725 Mon Sep 17 00:00:00 2001 From: Tilly Woodfield <22456167+tillywoodfield@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:33:30 +0200 Subject: [PATCH 1/2] feat: add functionality for downloading dataset via post request --- oc4ids_datastore_pipeline/pipeline.py | 13 ++++++++++--- tests/test_pipeline.py | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/oc4ids_datastore_pipeline/pipeline.py b/oc4ids_datastore_pipeline/pipeline.py index 3b3f63b..e77f1b3 100644 --- a/oc4ids_datastore_pipeline/pipeline.py +++ b/oc4ids_datastore_pipeline/pipeline.py @@ -36,10 +36,17 @@ def __init__(self, errors_count: int, errors: list[str]): super().__init__(message) -def download_json(url: str) -> Any: +def download_json(dataset_id: str, url: str) -> Any: logger.info(f"Downloading json from {url}") try: - r = requests.get(url) + if dataset_id == "malawi_cost_malawi": + payload = { + "start_date": "2010-01-01", + "end_date": datetime.datetime.today().strftime("%Y-%m-%d"), + } + r = requests.post(url, json=payload) + else: + r = requests.get(url) r.raise_for_status() response_size = len(r.content) logger.info(f"Downloaded {url} ({response_size} bytes)") @@ -130,7 +137,7 @@ def save_dataset_metadata( def process_dataset(dataset_id: str, source_url: str) -> None: logger.info(f"Processing dataset {dataset_id}") - json_data = download_json(source_url) + json_data = download_json(dataset_id, source_url) validate_json(dataset_id, json_data) json_path = write_json_to_file( file_name=f"data/{dataset_id}/{dataset_id}.json", diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f09425e..b78aee4 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -22,7 +22,7 @@ def test_download_json_raises_failure_exception(mocker: MockerFixture) -> None: patch_get.side_effect = Exception("Mocked exception") with pytest.raises(ProcessDatasetError) as exc_info: - download_json(url="https://test_dataset.json") + download_json(dataset_id="test_dataset", url="https://test_dataset.json") assert "Download failed" in str(exc_info.value) assert "Mocked exception" in str(exc_info.value) From 2ab2014729ea24d6fee1d3439ddca04fe26b245c Mon Sep 17 00:00:00 2001 From: Tilly Woodfield <22456167+tillywoodfield@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:34:02 +0200 Subject: [PATCH 2/2] feat: bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6af756c..fe5231f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "flit_core.buildapi" [project] name = "oc4ids-datastore-pipeline" description = "OC4IDS Datastore Pipeline" -version = "0.3.0" +version = "0.4.0" readme = "README.md" dependencies = [ "alembic",