|
8 | 8 |
|
9 | 9 | logger = logging.getLogger(__name__)
|
10 | 10 |
|
11 |
| -REGISTERED_DATASETS = { |
12 |
| - "uganda_gpp": "https://gpp.ppda.go.ug/adminapi/public/api/open-data/v1/infrastructure/projects/download?format=json", # noqa: E501 |
13 |
| - "ghana_cost_sekondi_takoradi": "https://costsekondi-takoradigh.org/uploads/projectJson.json", # noqa: E501 |
14 |
| - "mexico_cost_jalisco": "http://www.costjalisco.org.mx/jsonprojects", |
15 |
| - "mexico_nuevo_leon": "http://si.nl.gob.mx/siasi_ws/api/edcapi/DescargarProjectPackage", # noqa: E501 |
16 |
| - "indonesia_cost_west_lombok": "https://intras.lombokbaratkab.go.id/oc4ids", |
17 |
| - "ukraine_cost_ukraine": "https://portal.costukraine.org/data.json", |
18 |
| - "malawi_cost_malawi": "https://ippi.mw/api/projects/query", |
19 |
| -} |
| 11 | + |
| 12 | +def fetch_registered_datasets() -> dict[str, str]: |
| 13 | + logger.info("Fetching registered datasets list from registry") |
| 14 | + try: |
| 15 | + url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/dataset/records_api.json" # noqa: E501 |
| 16 | + r = requests.get(url) |
| 17 | + r.raise_for_status() |
| 18 | + json_data = r.json() |
| 19 | + registered_datasets = { |
| 20 | + key: value["fields"]["url"]["value"] |
| 21 | + for (key, value) in json_data["records"].items() |
| 22 | + } |
| 23 | + registered_datasets_count = len(registered_datasets) |
| 24 | + logger.info(f"Fetched URLs for {registered_datasets_count} datasets") |
| 25 | + return registered_datasets |
| 26 | + except Exception as e: |
| 27 | + raise Exception("Failed to fetch datasets list from registry", e) |
20 | 28 |
|
21 | 29 |
|
22 | 30 | def download_json(url: str) -> Any:
|
@@ -66,7 +74,8 @@ def process_dataset(dataset_name: str, dataset_url: str) -> None:
|
66 | 74 |
|
67 | 75 |
|
68 | 76 | def process_datasets() -> None:
|
69 |
| - for name, url in REGISTERED_DATASETS.items(): |
| 77 | + registered_datasets = fetch_registered_datasets() |
| 78 | + for name, url in registered_datasets.items(): |
70 | 79 | process_dataset(name, url)
|
71 | 80 |
|
72 | 81 |
|
|
0 commit comments