diff --git a/.github/workflows/build-and-push-image.yml b/.github/workflows/build-and-push-image.yml new file mode 100644 index 0000000..87dd7d8 --- /dev/null +++ b/.github/workflows/build-and-push-image.yml @@ -0,0 +1,32 @@ +name: Build and push image + +on: + release: + types: [created] + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + env: + IMAGE_NAME: "oc4ids-datastore-pipeline" + steps: + - uses: actions/checkout@v4 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract version + run: | + TAG=${GITHUB_REF#refs/*/} + echo "VERSION=${TAG#v}" >> $GITHUB_ENV + - name: Print version + run: echo $VERSION + - name: Build and push image + run: | + IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME + IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') + echo $IMAGE_ID + docker build . -t ${IMAGE_ID}:${VERSION} -t ${IMAGE_ID}:latest + docker push --all-tags ${IMAGE_ID} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 019e378..00c890f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,3 +23,5 @@ jobs: run: mypy oc4ids_datastore_pipeline/ tests/ - name: Run tests run: pytest + - name: Build docker image + run: docker build -t oc4ids-datastore-pipeline . diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..65ff616 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.12-slim + +RUN apt-get update \ + && apt-get install -y libpq-dev gcc + +WORKDIR /oc4ids_datastore_pipeline + +COPY requirements.txt . + +RUN pip install -r requirements.txt + +COPY . . + +RUN pip install . + +ENTRYPOINT ["sh", "-c", "alembic upgrade head && oc4ids-datastore-pipeline"] diff --git a/README.md b/README.md index 25e3ab3..6e6c35e 100644 --- a/README.md +++ b/README.md @@ -71,3 +71,7 @@ pytest ``` alembic revision --autogenerate -m "" ``` + +## Releasing + +To publish a new version, raise a PR to `main` updating the version in `pyproject.toml`. Once merged, create a git tag and GitHub release for the new version, with naming `vX.Y.Z`. This will trigger a docker image to to be built and pushed, tagged with the version and `latest`. diff --git a/migrations/versions/3499656b84e7_allow_nullable_json_url.py b/migrations/versions/3499656b84e7_allow_nullable_json_url.py new file mode 100644 index 0000000..ae42e02 --- /dev/null +++ b/migrations/versions/3499656b84e7_allow_nullable_json_url.py @@ -0,0 +1,31 @@ +"""allow nullable json_url + +Revision ID: 3499656b84e7 +Revises: 084c39bf418e +Create Date: 2025-02-11 16:44:30.550413 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "3499656b84e7" +down_revision: Union[str, None] = "084c39bf418e" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column("dataset", "json_url", existing_type=sa.VARCHAR(), nullable=True) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column("dataset", "json_url", existing_type=sa.VARCHAR(), nullable=False) + # ### end Alembic commands ### diff --git a/oc4ids_datastore_pipeline/database.py b/oc4ids_datastore_pipeline/database.py index f699b56..ddafb8f 100644 --- a/oc4ids_datastore_pipeline/database.py +++ b/oc4ids_datastore_pipeline/database.py @@ -30,7 +30,7 @@ class Dataset(Base): publisher_name: Mapped[str] = mapped_column(String) license_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) license_name: Mapped[Optional[str]] = mapped_column(String, nullable=True) - json_url: Mapped[str] = mapped_column(String) + json_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) csv_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) xlsx_url: Mapped[Optional[str]] = mapped_column(String, nullable=True) updated_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) diff --git a/oc4ids_datastore_pipeline/pipeline.py b/oc4ids_datastore_pipeline/pipeline.py index f21b03f..efbe5af 100644 --- a/oc4ids_datastore_pipeline/pipeline.py +++ b/oc4ids_datastore_pipeline/pipeline.py @@ -145,6 +145,7 @@ def process_registry() -> None: process_deleted_datasets(registered_datasets) for name, url in registered_datasets.items(): process_dataset(name, url) + logger.info("Finished processing all datasets") def run() -> None: diff --git a/pyproject.toml b/pyproject.toml index d9661cf..1413f3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "flattentool", "libcoveoc4ids", "psycopg2", + "python-dotenv", "requests", "sqlalchemy", ] @@ -26,7 +27,6 @@ dev = [ "mypy", "pytest", "pytest-mock", - "python-dotenv", "types-boto3", "types-requests", ] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ed44670 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,181 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --output-file=requirements.txt pyproject.toml +# +alembic==1.14.1 + # via oc4ids-datastore-pipeline (pyproject.toml) +attrs==25.1.0 + # via + # cattrs + # jsonschema + # referencing + # requests-cache +backports-datetime-fromisoformat==2.0.3 + # via flattentool +boto3==1.36.17 + # via oc4ids-datastore-pipeline (pyproject.toml) +botocore==1.36.17 + # via + # boto3 + # s3transfer +btrees==6.1 + # via zodb +cattrs==24.1.2 + # via requests-cache +certifi==2025.1.31 + # via requests +cffi==1.17.1 + # via persistent +charset-normalizer==3.4.1 + # via requests +click==8.1.8 + # via + # libcoveoc4ids + # libcoveocds +defusedxml==0.7.1 + # via odfpy +et-xmlfile==2.0.0 + # via openpyxl +flattentool==0.27.0 + # via + # libcove + # oc4ids-datastore-pipeline (pyproject.toml) +idna==3.10 + # via requests +ijson==3.3.0 + # via flattentool +jmespath==1.0.1 + # via + # boto3 + # botocore +json-merge-patch==0.2 + # via ocdsextensionregistry +jsonref==1.1.0 + # via + # flattentool + # libcove + # libcoveocds + # ocdsextensionregistry +jsonschema==4.23.0 + # via + # libcove + # libcoveocds +jsonschema-specifications==2024.10.1 + # via jsonschema +libcove==0.32.1 + # via + # libcoveoc4ids + # libcoveocds +libcoveoc4ids==0.9.0 + # via oc4ids-datastore-pipeline (pyproject.toml) +libcoveocds==0.16.4 + # via libcoveoc4ids +lxml==5.3.1 + # via flattentool +mako==1.3.9 + # via alembic +markupsafe==3.0.2 + # via mako +ocdsextensionregistry==0.6.9 + # via libcoveocds +odfpy==1.4.1 + # via flattentool +openpyxl==3.1.5 + # via flattentool +persistent==6.1 + # via + # btrees + # zodb +platformdirs==4.3.6 + # via requests-cache +psycopg2==2.9.10 + # via oc4ids-datastore-pipeline (pyproject.toml) +pycparser==2.22 + # via cffi +python-dateutil==2.9.0.post0 + # via botocore +python-dotenv==1.0.1 + # via oc4ids-datastore-pipeline (pyproject.toml) +pytz==2025.1 + # via flattentool +referencing==0.36.2 + # via + # jsonschema + # jsonschema-specifications + # libcove + # libcoveocds +requests==2.32.3 + # via + # libcove + # libcoveocds + # oc4ids-datastore-pipeline (pyproject.toml) + # ocdsextensionregistry + # requests-cache +requests-cache==1.2.1 + # via ocdsextensionregistry +rfc3339-validator==0.1.4 + # via libcove +rfc3987==1.3.8 + # via libcove +rpds-py==0.22.3 + # via + # jsonschema + # referencing +s3transfer==0.11.2 + # via boto3 +schema==0.7.7 + # via flattentool +six==1.17.0 + # via + # python-dateutil + # rfc3339-validator + # url-normalize +sqlalchemy==2.0.38 + # via + # alembic + # oc4ids-datastore-pipeline (pyproject.toml) +transaction==5.0 + # via zodb +typing-extensions==4.12.2 + # via + # alembic + # referencing + # sqlalchemy +url-normalize==1.4.3 + # via requests-cache +urllib3==2.3.0 + # via + # botocore + # requests + # requests-cache +xmltodict==0.14.2 + # via flattentool +zc-lockfile==3.0.post1 + # via zodb +zc-zlibstorage==1.2.0 + # via flattentool +zconfig==4.2 + # via zodb +zodb==6.0 + # via + # flattentool + # zc-zlibstorage +zodbpickle==4.1.1 + # via zodb +zope-deferredimport==5.0 + # via persistent +zope-interface==7.2 + # via + # btrees + # persistent + # transaction + # zc-zlibstorage + # zodb + # zope-proxy +zope-proxy==6.1 + # via zope-deferredimport + +# The following packages are considered to be unsafe in a requirements file: +# setuptools