Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/build-and-push-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Build and push image

on:
push:
tags:
- "v*"

jobs:
build-and-push-image:
runs-on: ubuntu-latest
env:
IMAGE_NAME: "oc4ids-datastore-pipeline"
steps:
- uses: actions/checkout@v4
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract version
run: |
TAG=${GITHUB_REF#refs/*/}
echo "VERSION=${TAG#v}" >> $GITHUB_ENV
- name: Print version
run: echo $VERSION
- name: Build and push image
run: |
IMAGE_ID=ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
echo $IMAGE_ID
docker build . -t ${IMAGE_ID}:${VERSION} -t ${IMAGE_ID}:latest
docker push --all-tags ${IMAGE_ID}
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ jobs:
run: mypy oc4ids_datastore_pipeline/ tests/
- name: Run tests
run: pytest
- name: Build docker image
run: docker build -t oc4ids-datastore-pipeline .
27 changes: 27 additions & 0 deletions .github/workflows/create-tag.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Create tag

on:
push:
branches:
- live

jobs:
create-tag:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install local package
run: pip install .
- name: Extract version
run: |
VERSION=$(python -c "import importlib.metadata; print(importlib.metadata.version('oc4ids-datastore-pipeline'))")
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Print version
run: echo $VERSION
- name: Create tag
run: |
git tag "v${VERSION}"
git push origin "v${VERSION}"
16 changes: 16 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM python:3.12-slim

RUN apt-get update \
&& apt-get install -y libpq-dev gcc

WORKDIR /oc4ids_datastore_pipeline

COPY requirements.txt .

RUN pip install -r requirements.txt

COPY . .

RUN pip install .

ENTRYPOINT ["sh", "-c", "alembic upgrade head && oc4ids-datastore-pipeline"]
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,8 @@ pytest
```
alembic revision --autogenerate -m "<MESSAGE HERE>"
```

## Releasing

On merge to `live`, a tag will be created using the version in `pyproject.toml`,
which will then trigger a docker image build.
31 changes: 31 additions & 0 deletions migrations/versions/3499656b84e7_allow_nullable_json_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""allow nullable json_url

Revision ID: 3499656b84e7
Revises: 084c39bf418e
Create Date: 2025-02-11 16:44:30.550413

"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "3499656b84e7"
down_revision: Union[str, None] = "084c39bf418e"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column("dataset", "json_url", existing_type=sa.VARCHAR(), nullable=True)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column("dataset", "json_url", existing_type=sa.VARCHAR(), nullable=False)
# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion oc4ids_datastore_pipeline/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Dataset(Base):
publisher_name: Mapped[str] = mapped_column(String)
license_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
license_name: Mapped[Optional[str]] = mapped_column(String, nullable=True)
json_url: Mapped[str] = mapped_column(String)
json_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
csv_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
xlsx_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
updated_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True))
Expand Down
1 change: 1 addition & 0 deletions oc4ids_datastore_pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def process_registry() -> None:
process_deleted_datasets(registered_datasets)
for name, url in registered_datasets.items():
process_dataset(name, url)
logger.info("Finished processing all datasets")


def run() -> None:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"flattentool",
"libcoveoc4ids",
"psycopg2",
"python-dotenv",
"requests",
"sqlalchemy",
]
Expand All @@ -26,7 +27,6 @@ dev = [
"mypy",
"pytest",
"pytest-mock",
"python-dotenv",
"types-boto3",
"types-requests",
]
Expand Down
181 changes: 181 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --output-file=requirements.txt pyproject.toml
#
alembic==1.14.1
# via oc4ids-datastore-pipeline (pyproject.toml)
attrs==25.1.0
# via
# cattrs
# jsonschema
# referencing
# requests-cache
backports-datetime-fromisoformat==2.0.3
# via flattentool
boto3==1.36.17
# via oc4ids-datastore-pipeline (pyproject.toml)
botocore==1.36.17
# via
# boto3
# s3transfer
btrees==6.1
# via zodb
cattrs==24.1.2
# via requests-cache
certifi==2025.1.31
# via requests
cffi==1.17.1
# via persistent
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# libcoveoc4ids
# libcoveocds
defusedxml==0.7.1
# via odfpy
et-xmlfile==2.0.0
# via openpyxl
flattentool==0.27.0
# via
# libcove
# oc4ids-datastore-pipeline (pyproject.toml)
idna==3.10
# via requests
ijson==3.3.0
# via flattentool
jmespath==1.0.1
# via
# boto3
# botocore
json-merge-patch==0.2
# via ocdsextensionregistry
jsonref==1.1.0
# via
# flattentool
# libcove
# libcoveocds
# ocdsextensionregistry
jsonschema==4.23.0
# via
# libcove
# libcoveocds
jsonschema-specifications==2024.10.1
# via jsonschema
libcove==0.32.1
# via
# libcoveoc4ids
# libcoveocds
libcoveoc4ids==0.9.0
# via oc4ids-datastore-pipeline (pyproject.toml)
libcoveocds==0.16.4
# via libcoveoc4ids
lxml==5.3.1
# via flattentool
mako==1.3.9
# via alembic
markupsafe==3.0.2
# via mako
ocdsextensionregistry==0.6.9
# via libcoveocds
odfpy==1.4.1
# via flattentool
openpyxl==3.1.5
# via flattentool
persistent==6.1
# via
# btrees
# zodb
platformdirs==4.3.6
# via requests-cache
psycopg2==2.9.10
# via oc4ids-datastore-pipeline (pyproject.toml)
pycparser==2.22
# via cffi
python-dateutil==2.9.0.post0
# via botocore
python-dotenv==1.0.1
# via oc4ids-datastore-pipeline (pyproject.toml)
pytz==2025.1
# via flattentool
referencing==0.36.2
# via
# jsonschema
# jsonschema-specifications
# libcove
# libcoveocds
requests==2.32.3
# via
# libcove
# libcoveocds
# oc4ids-datastore-pipeline (pyproject.toml)
# ocdsextensionregistry
# requests-cache
requests-cache==1.2.1
# via ocdsextensionregistry
rfc3339-validator==0.1.4
# via libcove
rfc3987==1.3.8
# via libcove
rpds-py==0.22.3
# via
# jsonschema
# referencing
s3transfer==0.11.2
# via boto3
schema==0.7.7
# via flattentool
six==1.17.0
# via
# python-dateutil
# rfc3339-validator
# url-normalize
sqlalchemy==2.0.38
# via
# alembic
# oc4ids-datastore-pipeline (pyproject.toml)
transaction==5.0
# via zodb
typing-extensions==4.12.2
# via
# alembic
# referencing
# sqlalchemy
url-normalize==1.4.3
# via requests-cache
urllib3==2.3.0
# via
# botocore
# requests
# requests-cache
xmltodict==0.14.2
# via flattentool
zc-lockfile==3.0.post1
# via zodb
zc-zlibstorage==1.2.0
# via flattentool
zconfig==4.2
# via zodb
zodb==6.0
# via
# flattentool
# zc-zlibstorage
zodbpickle==4.1.1
# via zodb
zope-deferredimport==5.0
# via persistent
zope-interface==7.2
# via
# btrees
# persistent
# transaction
# zc-zlibstorage
# zodb
# zope-proxy
zope-proxy==6.1
# via zope-deferredimport

# The following packages are considered to be unsafe in a requirements file:
# setuptools