|
| 1 | +import json |
1 | 2 | import logging
|
| 3 | +import os |
| 4 | +from typing import Any |
| 5 | + |
| 6 | +import requests |
| 7 | +from libcoveoc4ids.api import oc4ids_json_output |
2 | 8 |
|
3 | 9 | logger = logging.getLogger(__name__)
|
4 | 10 |
|
| 11 | +REGISTERED_DATASETS = { |
| 12 | + "uganda_gpp": "https://gpp.ppda.go.ug/adminapi/public/api/open-data/v1/infrastructure/projects/download?format=json", # noqa: E501 |
| 13 | + "ghana_cost_sekondi_takoradi": "https://costsekondi-takoradigh.org/uploads/projectJson.json", # noqa: E501 |
| 14 | + "mexico_cost_jalisco": "http://www.costjalisco.org.mx/jsonprojects", |
| 15 | + "mexico_nuevo_leon": "http://si.nl.gob.mx/siasi_ws/api/edcapi/DescargarProjectPackage", # noqa: E501 |
| 16 | + "indonesia_cost_west_lombok": "https://intras.lombokbaratkab.go.id/oc4ids", |
| 17 | + "ukraine_cost_ukraine": "https://portal.costukraine.org/data.json", |
| 18 | + "malawi_cost_malawi": "https://ippi.mw/api/projects/query", |
| 19 | +} |
| 20 | + |
| 21 | + |
| 22 | +def download_json(url: str) -> Any: |
| 23 | + logger.info(f"Downloading json from {url}") |
| 24 | + try: |
| 25 | + r = requests.get(url) |
| 26 | + r.raise_for_status() |
| 27 | + response_size = len(r.content) |
| 28 | + logger.info(f"Downloaded {url} ({response_size} bytes)") |
| 29 | + return r.json() |
| 30 | + except Exception as e: |
| 31 | + raise Exception("Download failed", e) |
| 32 | + |
| 33 | + |
| 34 | +def validate_json(dataset_name: str, json_data: Any) -> None: |
| 35 | + logger.info(f"Validating dataset {dataset_name}") |
| 36 | + try: |
| 37 | + validation_result = oc4ids_json_output(json_data=json_data) |
| 38 | + validation_errors_count = validation_result["validation_errors_count"] |
| 39 | + if validation_errors_count > 0: |
| 40 | + raise Exception(f"Dataset has {validation_errors_count} validation errors") |
| 41 | + logger.info(f"Dataset {dataset_name} is valid") |
| 42 | + except Exception as e: |
| 43 | + raise Exception("Validation failed", e) |
| 44 | + |
| 45 | + |
| 46 | +def write_json_to_file(file_name: str, json_data: Any) -> None: |
| 47 | + logger.info(f"Writing dataset to file {file_name}") |
| 48 | + try: |
| 49 | + os.makedirs(os.path.dirname(file_name), exist_ok=True) |
| 50 | + with open(file_name, "w") as file: |
| 51 | + json.dump(json_data, file, indent=4) |
| 52 | + logger.info(f"Finished writing to {file_name}") |
| 53 | + except Exception as e: |
| 54 | + raise Exception("Error while writing to JSON file", e) |
| 55 | + |
| 56 | + |
| 57 | +def process_dataset(dataset_name: str, dataset_url: str) -> None: |
| 58 | + logger.info(f"Processing dataset {dataset_name}") |
| 59 | + try: |
| 60 | + json_data = download_json(dataset_url) |
| 61 | + validate_json(dataset_name, json_data) |
| 62 | + write_json_to_file(f"data/{dataset_name}.json", json_data) |
| 63 | + logger.info(f"Processed dataset {dataset_name}") |
| 64 | + except Exception as e: |
| 65 | + logger.warning(f"Failed to process dataset {dataset_name} with error {e}") |
| 66 | + |
| 67 | + |
| 68 | +def process_datasets() -> None: |
| 69 | + for name, url in REGISTERED_DATASETS.items(): |
| 70 | + process_dataset(name, url) |
| 71 | + |
5 | 72 |
|
6 | 73 | def run() -> None:
|
7 |
| - logger.info("Hello World!") |
| 74 | + process_datasets() |
0 commit comments