diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4f2ac72 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,25 @@ +name: CI +on: [push] + +jobs: + ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dev requirements + run: pip install -r requirements_dev.txt + - name: Install local package + run: pip install . + - name: Check black + run: black --check oc4ids_datastore_api/ tests/ + - name: Check isort + run: isort --check-only oc4ids_datastore_api/ tests/ + - name: Check flake8 + run: flake8 oc4ids_datastore_api/ tests/ + - name: Check mypy + run: mypy oc4ids_datastore_api/ tests/ + - name: Run tests + run: pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..033df5f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv +__pycache__ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/README.md b/README.md index 93db623..8510221 100644 --- a/README.md +++ b/README.md @@ -1 +1,42 @@ -# oc4ids-datastore-api \ No newline at end of file +# OC4IDS Datastore API + +## Local Development + +### Prerequisites + +- Python 3.12 + +### Install Python requirements + +``` +python -m venv .venv +source .venv/bin/activate +pip install -r requirements_dev.txt +``` + +### Set database enrivonment variable + +With a read-only user, set the path to the already existing database, which is created by [oc4ids-datastore-pipeline](https://github.com/OpenDataServices/oc4ids-datastore-pipeline). + +``` +export DATABASE_URL="postgresql://oc4ids_datastore_read_only@localhost/oc4ids_datastore" +``` + +### Run app + +``` +fastapi dev oc4ids_datastore_api/main.py +``` + +### View the OpenAPI schema + +While the app is running, go to `http://127.0.0.1:8000/docs/` + +### Run linting and type checking + +``` +black oc4ids_datastore_api/ tests/ +isort oc4ids_datastore_api/ tests/ +flake8 oc4ids_datastore_api/ tests/ +mypy oc4ids_datastore_api/ tests/ +``` diff --git a/oc4ids_datastore_api/__init__.py b/oc4ids_datastore_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/oc4ids_datastore_api/database.py b/oc4ids_datastore_api/database.py new file mode 100644 index 0000000..9e2445c --- /dev/null +++ b/oc4ids_datastore_api/database.py @@ -0,0 +1,21 @@ +import os +from typing import Sequence + +from sqlalchemy import Engine +from sqlmodel import Session, create_engine, select + +from oc4ids_datastore_api.models import DatasetSQLModel + +_engine = None + + +def get_engine() -> Engine: + global _engine + if _engine is None: + _engine = create_engine(os.environ["DATABASE_URL"]) + return _engine + + +def fetch_all_datasets() -> Sequence[DatasetSQLModel]: + with Session(get_engine()) as session: + return session.exec(select(DatasetSQLModel)).all() diff --git a/oc4ids_datastore_api/main.py b/oc4ids_datastore_api/main.py new file mode 100644 index 0000000..3425f8e --- /dev/null +++ b/oc4ids_datastore_api/main.py @@ -0,0 +1,11 @@ +from fastapi import FastAPI + +from oc4ids_datastore_api.schemas import Dataset +from oc4ids_datastore_api.services import get_all_datasets + +app = FastAPI() + + +@app.get("/datasets") +def get_datasets() -> list[Dataset]: + return get_all_datasets() diff --git a/oc4ids_datastore_api/models.py b/oc4ids_datastore_api/models.py new file mode 100644 index 0000000..1386b74 --- /dev/null +++ b/oc4ids_datastore_api/models.py @@ -0,0 +1,17 @@ +import datetime + +from sqlmodel import Field, SQLModel + + +class DatasetSQLModel(SQLModel, table=True): + __tablename__ = "dataset" + + dataset_id: str = Field(primary_key=True) + source_url: str + publisher_name: str + license_url: str | None + license_name: str | None + json_url: str | None + csv_url: str | None + xlsx_url: str | None + updated_at: datetime.datetime diff --git a/oc4ids_datastore_api/schemas.py b/oc4ids_datastore_api/schemas.py new file mode 100644 index 0000000..0486d12 --- /dev/null +++ b/oc4ids_datastore_api/schemas.py @@ -0,0 +1,25 @@ +import datetime + +from pydantic import BaseModel + + +class Publisher(BaseModel): + name: str + + +class License(BaseModel): + url: str | None + name: str | None + + +class Download(BaseModel): + format: str + url: str + + +class Dataset(BaseModel): + loaded_at: datetime.datetime + source_url: str + publisher: Publisher + license: License + downloads: list[Download] diff --git a/oc4ids_datastore_api/services.py b/oc4ids_datastore_api/services.py new file mode 100644 index 0000000..76a96e0 --- /dev/null +++ b/oc4ids_datastore_api/services.py @@ -0,0 +1,26 @@ +from oc4ids_datastore_api.database import fetch_all_datasets +from oc4ids_datastore_api.models import DatasetSQLModel +from oc4ids_datastore_api.schemas import Dataset, Download, License, Publisher + + +def _transform_dataset(dataset: DatasetSQLModel) -> Dataset: + download_urls = { + "json": dataset.json_url, + "csv": dataset.csv_url, + "xlsx": dataset.xlsx_url, + } + downloads = [ + Download(format=format, url=url) for format, url in download_urls.items() if url + ] + return Dataset( + loaded_at=dataset.updated_at, + source_url=dataset.source_url, + publisher=Publisher(name=dataset.publisher_name), + license=License(url=dataset.license_url, name=dataset.license_name), + downloads=downloads, + ) + + +def get_all_datasets() -> list[Dataset]: + datasets = fetch_all_datasets() + return [_transform_dataset(dataset) for dataset in datasets] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8e174ea --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "oc4ids-datastore-api" +description = "OC4IDS Datastore API" +version = "0.1.0" +readme = "README.md" +dependencies = [ + "fastapi[standard]", + "psycopg2", + "sqlmodel", +] + +[project.optional-dependencies] +dev = [ + "black", + "isort", + "flake8", + "Flake8-pyproject", + "mypy", + "pytest", + "pytest-mock", +] + +[tool.isort] +profile = "black" + +[tool.flake8] +max-line-length = 88 + +[tool.mypy] +strict = true + +[tool.pytest.ini_options] +log_cli = true +log_cli_level = "INFO" diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..770111a --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,148 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --extra=dev --output-file=requirements_dev.txt pyproject.toml +# +annotated-types==0.7.0 + # via pydantic +anyio==4.8.0 + # via + # httpx + # starlette + # watchfiles +black==25.1.0 + # via oc4ids-datastore-api (pyproject.toml) +certifi==2025.1.31 + # via + # httpcore + # httpx +click==8.1.8 + # via + # black + # rich-toolkit + # typer + # uvicorn +dnspython==2.7.0 + # via email-validator +email-validator==2.2.0 + # via fastapi +fastapi[standard]==0.115.8 + # via oc4ids-datastore-api (pyproject.toml) +fastapi-cli[standard]==0.0.7 + # via fastapi +flake8==7.1.1 + # via + # flake8-pyproject + # oc4ids-datastore-api (pyproject.toml) +flake8-pyproject==1.2.3 + # via oc4ids-datastore-api (pyproject.toml) +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.7 + # via httpx +httptools==0.6.4 + # via uvicorn +httpx==0.28.1 + # via fastapi +idna==3.10 + # via + # anyio + # email-validator + # httpx +iniconfig==2.0.0 + # via pytest +isort==6.0.0 + # via oc4ids-datastore-api (pyproject.toml) +jinja2==3.1.5 + # via fastapi +markdown-it-py==3.0.0 + # via rich +markupsafe==3.0.2 + # via jinja2 +mccabe==0.7.0 + # via flake8 +mdurl==0.1.2 + # via markdown-it-py +mypy==1.15.0 + # via oc4ids-datastore-api (pyproject.toml) +mypy-extensions==1.0.0 + # via + # black + # mypy +packaging==24.2 + # via + # black + # pytest +pathspec==0.12.1 + # via black +platformdirs==4.3.6 + # via black +pluggy==1.5.0 + # via pytest +psycopg2==2.9.10 + # via oc4ids-datastore-api (pyproject.toml) +pycodestyle==2.12.1 + # via flake8 +pydantic==2.10.6 + # via + # fastapi + # sqlmodel +pydantic-core==2.27.2 + # via pydantic +pyflakes==3.2.0 + # via flake8 +pygments==2.19.1 + # via rich +pytest==8.3.4 + # via + # oc4ids-datastore-api (pyproject.toml) + # pytest-mock +pytest-mock==3.14.0 + # via oc4ids-datastore-api (pyproject.toml) +python-dotenv==1.0.1 + # via uvicorn +python-multipart==0.0.20 + # via fastapi +pyyaml==6.0.2 + # via uvicorn +rich==13.9.4 + # via + # rich-toolkit + # typer +rich-toolkit==0.13.2 + # via fastapi-cli +shellingham==1.5.4 + # via typer +sniffio==1.3.1 + # via anyio +sqlalchemy==2.0.37 + # via sqlmodel +sqlmodel==0.0.22 + # via oc4ids-datastore-api (pyproject.toml) +starlette==0.45.3 + # via fastapi +typer==0.15.1 + # via fastapi-cli +typing-extensions==4.12.2 + # via + # anyio + # fastapi + # mypy + # pydantic + # pydantic-core + # rich-toolkit + # sqlalchemy + # typer +uvicorn[standard]==0.34.0 + # via + # fastapi + # fastapi-cli +uvloop==0.21.0 + # via uvicorn +watchfiles==1.0.4 + # via uvicorn +websockets==14.2 + # via uvicorn diff --git a/tests/test_services.py b/tests/test_services.py new file mode 100644 index 0000000..4e290e6 --- /dev/null +++ b/tests/test_services.py @@ -0,0 +1,74 @@ +import datetime + +from pytest_mock import MockerFixture + +from oc4ids_datastore_api.models import DatasetSQLModel +from oc4ids_datastore_api.schemas import Dataset, Download, License, Publisher +from oc4ids_datastore_api.services import get_all_datasets + + +def test_get_all_datasets(mocker: MockerFixture) -> None: + patch_fetch_all_datasets = mocker.patch( + "oc4ids_datastore_api.services.fetch_all_datasets" + ) + now = datetime.datetime.now() + dataset_sql_model = DatasetSQLModel( + dataset_id="test_dataset", + source_url="https://test-dataset.json", + publisher_name="test_publisher", + license_url="https://license.com", + license_name="License", + json_url="https://downloads/test_dataset.json", + csv_url="https://downloads/test_dataset.csv", + xlsx_url="https://downloads/test_dataset.xlsx", + updated_at=now, + ) + patch_fetch_all_datasets.return_value = [dataset_sql_model] + + datasets = get_all_datasets() + + expected_dataset = Dataset( + loaded_at=now, + source_url="https://test-dataset.json", + publisher=Publisher(name="test_publisher"), + license=License(name="License", url="https://license.com"), + downloads=[ + Download(format="json", url="https://downloads/test_dataset.json"), + Download(format="csv", url="https://downloads/test_dataset.csv"), + Download(format="xlsx", url="https://downloads/test_dataset.xlsx"), + ], + ) + assert datasets == [expected_dataset] + + +def test_get_all_datasets_missing_download_formats(mocker: MockerFixture) -> None: + patch_fetch_all_datasets = mocker.patch( + "oc4ids_datastore_api.services.fetch_all_datasets" + ) + now = datetime.datetime.now() + dataset_sql_model = DatasetSQLModel( + dataset_id="test_dataset", + source_url="https://test-dataset.json", + publisher_name="test_publisher", + license_url="https://license.com", + license_name="License", + json_url="https://downloads/test_dataset.json", + csv_url=None, + xlsx_url=None, + updated_at=now, + ) + patch_fetch_all_datasets.return_value = [dataset_sql_model] + + datasets = get_all_datasets() + + expected_dataset = Dataset( + loaded_at=now, + source_url="https://test-dataset.json", + publisher=Publisher(name="test_publisher"), + license=License(name="License", url="https://license.com"), + downloads=[ + Download(format="json", url="https://downloads/test_dataset.json"), + ], + ) + + assert datasets == [expected_dataset]