Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ jobs:
python-version: "3.12"
- name: Install dev requirements
run: pip install -r requirements_dev.txt
- name: Install local package
run: pip install .
- name: Check black
run: black --check oc4ids_datastore_pipeline/ tests/
- name: Check isort
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.venv
__pycache__

data/
69 changes: 68 additions & 1 deletion oc4ids_datastore_pipeline/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,74 @@
import json
import logging
import os
from typing import Any

import requests
from libcoveoc4ids.api import oc4ids_json_output

logger = logging.getLogger(__name__)

REGISTERED_DATASETS = {
"uganda_gpp": "https://gpp.ppda.go.ug/adminapi/public/api/open-data/v1/infrastructure/projects/download?format=json", # noqa: E501
"ghana_cost_sekondi_takoradi": "https://costsekondi-takoradigh.org/uploads/projectJson.json", # noqa: E501
"mexico_cost_jalisco": "http://www.costjalisco.org.mx/jsonprojects",
"mexico_nuevo_leon": "http://si.nl.gob.mx/siasi_ws/api/edcapi/DescargarProjectPackage", # noqa: E501
"indonesia_cost_west_lombok": "https://intras.lombokbaratkab.go.id/oc4ids",
"ukraine_cost_ukraine": "https://portal.costukraine.org/data.json",
"malawi_cost_malawi": "https://ippi.mw/api/projects/query",
}


def download_json(url: str) -> Any:
logger.info(f"Downloading json from {url}")
try:
r = requests.get(url)
r.raise_for_status()
response_size = len(r.content)
logger.info(f"Downloaded {url} ({response_size} bytes)")
return r.json()
except Exception as e:
raise Exception("Download failed", e)


def validate_json(dataset_name: str, json_data: Any) -> None:
logger.info(f"Validating dataset {dataset_name}")
try:
validation_result = oc4ids_json_output(json_data=json_data)
validation_errors_count = validation_result["validation_errors_count"]
if validation_errors_count > 0:
raise Exception(f"Dataset has {validation_errors_count} validation errors")
logger.info(f"Dataset {dataset_name} is valid")
except Exception as e:
raise Exception("Validation failed", e)


def write_json_to_file(file_name: str, json_data: Any) -> None:
logger.info(f"Writing dataset to file {file_name}")
try:
os.makedirs(os.path.dirname(file_name), exist_ok=True)
with open(file_name, "w") as file:
json.dump(json_data, file, indent=4)
logger.info(f"Finished writing to {file_name}")
except Exception as e:
raise Exception("Error while writing to JSON file", e)


def process_dataset(dataset_name: str, dataset_url: str) -> None:
logger.info(f"Processing dataset {dataset_name}")
try:
json_data = download_json(dataset_url)
validate_json(dataset_name, json_data)
write_json_to_file(f"data/{dataset_name}.json", json_data)
logger.info(f"Processed dataset {dataset_name}")
except Exception as e:
logger.warning(f"Failed to process dataset {dataset_name} with error {e}")


def process_datasets() -> None:
for name, url in REGISTERED_DATASETS.items():
process_dataset(name, url)


def run() -> None:
logger.info("Hello World!")
process_datasets()
15 changes: 14 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ name = "oc4ids-datastore-pipeline"
description = "OC4IDS Datastore Pipeline"
version = "0.1.0"
readme = "README.md"
dependencies = []
dependencies = [
"libcoveoc4ids",
"requests",
]

[project.optional-dependencies]
dev = [
Expand All @@ -17,6 +20,8 @@ dev = [
"Flake8-pyproject",
"mypy",
"pytest",
"pytest-mock",
"types-requests",
]

[project.scripts]
Expand All @@ -30,3 +35,11 @@ max-line-length = 88

[tool.mypy]
strict = true

[[tool.mypy.overrides]]
module = ["libcoveoc4ids.*"]
follow_untyped_imports = true

[tool.pytest.ini_options]
log_cli = true
log_cli_level = "INFO"
152 changes: 149 additions & 3 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,75 @@
#
# pip-compile --extra=dev --output-file=requirements_dev.txt pyproject.toml
#
attrs==25.1.0
# via
# cattrs
# jsonschema
# referencing
# requests-cache
backports-datetime-fromisoformat==2.0.3
# via flattentool
black==25.1.0
# via oc4ids-datastore-pipeline (pyproject.toml)
btrees==6.1
# via zodb
cattrs==24.1.2
# via requests-cache
certifi==2025.1.31
# via requests
cffi==1.17.1
# via persistent
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via black
# via
# black
# libcoveoc4ids
# libcoveocds
defusedxml==0.7.1
# via odfpy
et-xmlfile==2.0.0
# via openpyxl
flake8==7.1.1
# via
# flake8-pyproject
# oc4ids-datastore-pipeline (pyproject.toml)
flake8-pyproject==1.2.3
# via oc4ids-datastore-pipeline (pyproject.toml)
flattentool==0.27.0
# via libcove
idna==3.10
# via requests
ijson==3.3.0
# via flattentool
iniconfig==2.0.0
# via pytest
isort==6.0.0
# via oc4ids-datastore-pipeline (pyproject.toml)
json-merge-patch==0.2
# via ocdsextensionregistry
jsonref==1.1.0
# via
# flattentool
# libcove
# libcoveocds
# ocdsextensionregistry
jsonschema==4.23.0
# via
# libcove
# libcoveocds
jsonschema-specifications==2024.10.1
# via jsonschema
libcove==0.32.1
# via
# libcoveoc4ids
# libcoveocds
libcoveoc4ids==0.9.0
# via oc4ids-datastore-pipeline (pyproject.toml)
libcoveocds==0.16.4
# via libcoveoc4ids
lxml==5.3.0
# via flattentool
mccabe==0.7.0
# via flake8
mypy==1.14.1
Expand All @@ -26,21 +81,112 @@ mypy-extensions==1.0.0
# via
# black
# mypy
ocdsextensionregistry==0.6.9
# via libcoveocds
odfpy==1.4.1
# via flattentool
openpyxl==3.1.5
# via flattentool
packaging==24.2
# via
# black
# pytest
pathspec==0.12.1
# via black
persistent==6.1
# via
# btrees
# zodb
platformdirs==4.3.6
# via black
# via
# black
# requests-cache
pluggy==1.5.0
# via pytest
pycodestyle==2.12.1
# via flake8
pycparser==2.22
# via cffi
pyflakes==3.2.0
# via flake8
pytest==8.3.4
# via
# oc4ids-datastore-pipeline (pyproject.toml)
# pytest-mock
pytest-mock==3.14.0
# via oc4ids-datastore-pipeline (pyproject.toml)
pytz==2025.1
# via flattentool
referencing==0.36.2
# via
# jsonschema
# jsonschema-specifications
# libcove
# libcoveocds
requests==2.32.3
# via
# libcove
# libcoveocds
# oc4ids-datastore-pipeline (pyproject.toml)
# ocdsextensionregistry
# requests-cache
requests-cache==1.2.1
# via ocdsextensionregistry
rfc3339-validator==0.1.4
# via libcove
rfc3987==1.3.8
# via libcove
rpds-py==0.22.3
# via
# jsonschema
# referencing
schema==0.7.7
# via flattentool
six==1.17.0
# via
# rfc3339-validator
# url-normalize
transaction==5.0
# via zodb
types-requests==2.32.0.20241016
# via oc4ids-datastore-pipeline (pyproject.toml)
typing-extensions==4.12.2
# via mypy
# via
# mypy
# referencing
url-normalize==1.4.3
# via requests-cache
urllib3==2.3.0
# via
# requests
# requests-cache
# types-requests
xmltodict==0.14.2
# via flattentool
zc-lockfile==3.0.post1
# via zodb
zc-zlibstorage==1.2.0
# via flattentool
zconfig==4.2
# via zodb
zodb==6.0
# via
# flattentool
# zc-zlibstorage
zodbpickle==4.1.1
# via zodb
zope-deferredimport==5.0
# via persistent
zope-interface==7.2
# via
# btrees
# persistent
# transaction
# zc-zlibstorage
# zodb
# zope-proxy
zope-proxy==6.1
# via zope-deferredimport

# The following packages are considered to be unsafe in a requirements file:
# setuptools
Loading