Skip to content

Commit 8e3332a

Browse files
committed
feat: Introduce database migrations
Introduce database migrations using `alembic` and create a new version. Add a new Docker service that runs before the gateway and the scheduler and applies any new migrations. Signed-off-by: Phoevos Kalemkeris <[email protected]>
1 parent e384a8f commit 8e3332a

File tree

11 files changed

+409
-9
lines changed

11 files changed

+409
-9
lines changed

.github/workflows/docker.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
attestations: write
2525
strategy:
2626
matrix:
27-
service: [ gateway, scheduler, ripper ]
27+
service: [ gateway, scheduler, ripper, migrations ]
2828
include:
2929
- service: gateway
3030
image: cogstacksystems/cogstack-model-gateway
@@ -35,6 +35,9 @@ jobs:
3535
- service: ripper
3636
image: cogstacksystems/cogstack-model-gateway-ripper
3737
dockerfile: ./cogstack_model_gateway/ripper/Dockerfile
38+
- service: migrations
39+
image: cogstacksystems/cogstack-model-gateway-migrations
40+
dockerfile: ./cogstack_model_gateway/migrations/Dockerfile
3841

3942
steps:
4043
- uses: actions/checkout@v4
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
FROM ubuntu:24.04
2+
3+
ARG HTTP_PROXY
4+
ARG HTTPS_PROXY
5+
ARG NO_PROXY
6+
7+
ENV HTTP_PROXY=$HTTP_PROXY
8+
ENV HTTPS_PROXY=$HTTPS_PROXY
9+
ENV NO_PROXY=$NO_PROXY
10+
ENV http_proxy=$HTTP_PROXY
11+
ENV https_proxy=$HTTPS_PROXY
12+
ENV no_proxy=$NO_PROXY
13+
ENV DEBIAN_FRONTEND=noninteractive
14+
ENV POETRY_HOME=/etc/poetry
15+
ENV PATH="${POETRY_HOME}/bin:${PATH}"
16+
17+
RUN apt-get update && apt-get install -y curl python3
18+
RUN curl -sSL https://install.python-poetry.org | python3 -
19+
20+
WORKDIR /app
21+
ENV PYTHONPATH=/app
22+
23+
COPY pyproject.toml poetry.lock ./
24+
RUN poetry install --with migrations --no-root --no-directory
25+
26+
COPY cogstack_model_gateway/common/db.py ./cogstack_model_gateway/common/db.py
27+
COPY cogstack_model_gateway/common/tasks.py ./cogstack_model_gateway/common/tasks.py
28+
COPY cogstack_model_gateway/migrations ./cogstack_model_gateway/migrations
29+
30+
CMD ["poetry", "run", "alembic", "-c", "cogstack_model_gateway/migrations/alembic.ini", "upgrade", "head"]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# CogStack Model Gateway Migrations
2+
3+
A simple service to manage migrations for the CogStack Model Gateway.
4+
5+
A separate [pyproject.toml](./pyproject.toml) file is used to manage Poetry dependencies, in order
6+
to bypass Poetry limitations regarding local
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# A generic, single database configuration.
2+
3+
[alembic]
4+
# path to migration scripts.
5+
# this is typically a path given in POSIX (e.g. forward slashes)
6+
# format, relative to the token %(here)s which refers to the location of this
7+
# ini file
8+
script_location = %(here)s
9+
10+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
11+
# Uncomment the line below if you want the files to be prepended with date and time
12+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
13+
# for all available tokens
14+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
15+
16+
# sys.path path, will be prepended to sys.path if present.
17+
# defaults to the current working directory. for multiple paths, the path separator
18+
# is defined by "path_separator" below.
19+
prepend_sys_path = .
20+
21+
22+
# timezone to use when rendering the date within the migration file
23+
# as well as the filename.
24+
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
25+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
26+
# string value is passed to ZoneInfo()
27+
# leave blank for localtime
28+
# timezone =
29+
30+
# max length of characters to apply to the "slug" field
31+
# truncate_slug_length = 40
32+
33+
# set to 'true' to run the environment during
34+
# the 'revision' command, regardless of autogenerate
35+
# revision_environment = false
36+
37+
# set to 'true' to allow .pyc and .pyo files without
38+
# a source .py file to be detected as revisions in the
39+
# versions/ directory
40+
# sourceless = false
41+
42+
# version location specification; This defaults
43+
# to <script_location>/versions. When using multiple version
44+
# directories, initial revisions must be specified with --version-path.
45+
# The path separator used here should be the separator specified by "path_separator"
46+
# below.
47+
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
48+
49+
# path_separator; This indicates what character is used to split lists of file
50+
# paths, including version_locations and prepend_sys_path within configparser
51+
# files such as alembic.ini.
52+
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
53+
# to provide os-dependent path splitting.
54+
#
55+
# Note that in order to support legacy alembic.ini files, this default does NOT
56+
# take place if path_separator is not present in alembic.ini. If this
57+
# option is omitted entirely, fallback logic is as follows:
58+
#
59+
# 1. Parsing of the version_locations option falls back to using the legacy
60+
# "version_path_separator" key, which if absent then falls back to the legacy
61+
# behavior of splitting on spaces and/or commas.
62+
# 2. Parsing of the prepend_sys_path option falls back to the legacy
63+
# behavior of splitting on spaces, commas, or colons.
64+
#
65+
# Valid values for path_separator are:
66+
#
67+
# path_separator = :
68+
# path_separator = ;
69+
# path_separator = space
70+
# path_separator = newline
71+
#
72+
# Use os.pathsep. Default configuration used for new projects.
73+
path_separator = os
74+
75+
# set to 'true' to search source files recursively
76+
# in each "version_locations" directory
77+
# new in Alembic version 1.10
78+
# recursive_version_locations = false
79+
80+
# the output encoding used when revision files
81+
# are written from script.py.mako
82+
# output_encoding = utf-8
83+
84+
# database URL. This is consumed by the user-maintained env.py script only.
85+
# other means of configuring database URLs may be customized within the env.py
86+
# file.
87+
sqlalchemy.url = driver://user:pass@localhost/dbname
88+
89+
90+
[post_write_hooks]
91+
# post_write_hooks defines scripts or Python functions that are run
92+
# on newly generated revision scripts. See the documentation for further
93+
# detail and examples
94+
95+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
96+
# hooks = black
97+
# black.type = console_scripts
98+
# black.entrypoint = black
99+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
100+
101+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
102+
# hooks = ruff
103+
# ruff.type = exec
104+
# ruff.executable = %(here)s/.venv/bin/ruff
105+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
106+
107+
# Logging configuration. This is also consumed by the user-maintained
108+
# env.py script only.
109+
[loggers]
110+
keys = root,sqlalchemy,alembic
111+
112+
[handlers]
113+
keys = console
114+
115+
[formatters]
116+
keys = generic
117+
118+
[logger_root]
119+
level = WARNING
120+
handlers = console
121+
qualname =
122+
123+
[logger_sqlalchemy]
124+
level = WARNING
125+
handlers =
126+
qualname = sqlalchemy.engine
127+
128+
[logger_alembic]
129+
level = INFO
130+
handlers =
131+
qualname = alembic
132+
133+
[handler_console]
134+
class = StreamHandler
135+
args = (sys.stderr,)
136+
level = NOTSET
137+
formatter = generic
138+
139+
[formatter_generic]
140+
format = %(levelname)-5.5s [%(name)s] %(message)s
141+
datefmt = %H:%M:%S
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import os
2+
import sys
3+
from logging.config import fileConfig
4+
from pathlib import Path
5+
6+
from alembic import context
7+
from dotenv import load_dotenv
8+
from sqlalchemy import engine_from_config, pool
9+
10+
sys.path.append(str(Path(__file__).parent.parent.parent))
11+
12+
# this is the Alembic Config object, which provides
13+
# access to the values within the .ini file in use.
14+
config = context.config
15+
16+
# Interpret the config file for Python logging.
17+
# This line sets up loggers basically.
18+
if config.config_file_name is not None:
19+
fileConfig(config.config_file_name)
20+
21+
# add your model's MetaData object here
22+
# for 'autogenerate' support
23+
# from myapp import mymodel
24+
# target_metadata = mymodel.Base.metadata
25+
from cogstack_model_gateway.common.tasks import SQLModel, Task # noqa: E402, F401
26+
27+
target_metadata = SQLModel.metadata
28+
29+
# other values from the config, defined by the needs of env.py,
30+
# can be acquired:
31+
# my_important_option = config.get_main_option("my_important_option")
32+
# ... etc.
33+
34+
load_dotenv()
35+
36+
user = os.getenv("CMG_DB_USER")
37+
password = os.getenv("CMG_DB_PASSWORD")
38+
host = os.getenv("CMG_DB_HOST")
39+
port = os.getenv("CMG_DB_PORT")
40+
db_name = os.getenv("CMG_DB_NAME")
41+
42+
missing = [
43+
var
44+
for var, val in {
45+
"CMG_DB_USER": user,
46+
"CMG_DB_PASSWORD": password,
47+
"CMG_DB_HOST": host,
48+
"CMG_DB_PORT": port,
49+
"CMG_DB_NAME": db_name,
50+
}.items()
51+
if not val
52+
]
53+
54+
if missing:
55+
raise RuntimeError(f"Missing required environment variables: {', '.join(missing)}")
56+
57+
config.set_main_option(
58+
"sqlalchemy.url", f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
59+
)
60+
61+
62+
def run_migrations_offline() -> None:
63+
"""Run migrations in 'offline' mode.
64+
65+
This configures the context with just a URL
66+
and not an Engine, though an Engine is acceptable
67+
here as well. By skipping the Engine creation
68+
we don't even need a DBAPI to be available.
69+
70+
Calls to context.execute() here emit the given string to the
71+
script output.
72+
73+
"""
74+
url = config.get_main_option("sqlalchemy.url")
75+
context.configure(
76+
url=url,
77+
target_metadata=target_metadata,
78+
literal_binds=True,
79+
dialect_opts={"paramstyle": "named"},
80+
)
81+
82+
with context.begin_transaction():
83+
context.run_migrations()
84+
85+
86+
def run_migrations_online() -> None:
87+
"""Run migrations in 'online' mode.
88+
89+
In this scenario we need to create an Engine
90+
and associate a connection with the context.
91+
92+
"""
93+
connectable = engine_from_config(
94+
config.get_section(config.config_ini_section, {}),
95+
prefix="sqlalchemy.",
96+
poolclass=pool.NullPool,
97+
)
98+
99+
with connectable.connect() as connection:
100+
context.configure(connection=connection, target_metadata=target_metadata)
101+
102+
with context.begin_transaction():
103+
context.run_migrations()
104+
105+
106+
if context.is_offline_mode():
107+
run_migrations_offline()
108+
else:
109+
run_migrations_online()
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""${message}
2+
3+
Revision ID: ${up_revision}
4+
Revises: ${down_revision | comma,n}
5+
Create Date: ${create_date}
6+
7+
"""
8+
9+
from collections.abc import Sequence
10+
11+
import sqlalchemy as sa
12+
import sqlmodel
13+
from alembic import op
14+
${imports if imports else ""}
15+
16+
# revision identifiers, used by Alembic.
17+
revision: str = ${repr(up_revision)}
18+
down_revision: str | Sequence[str] | None = ${repr(down_revision)}
19+
branch_labels: str | Sequence[str] | None = ${repr(branch_labels)}
20+
depends_on: str | Sequence[str] | None = ${repr(depends_on)}
21+
22+
23+
def upgrade() -> None:
24+
"""Upgrade schema."""
25+
${upgrades if upgrades else "pass"}
26+
27+
28+
def downgrade() -> None:
29+
"""Downgrade schema."""
30+
${downgrades if downgrades else "pass"}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""Extend Task with request info and timestamps
2+
3+
Revision ID: 1859866dbf86
4+
Revises:
5+
Create Date: 2025-06-19 17:29:59.695124
6+
7+
"""
8+
9+
from collections.abc import Sequence
10+
11+
import sqlalchemy as sa
12+
import sqlmodel
13+
from alembic import op
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "1859866dbf86"
17+
down_revision: str | Sequence[str] | None = None
18+
branch_labels: str | Sequence[str] | None = None
19+
depends_on: str | Sequence[str] | None = None
20+
21+
22+
def upgrade() -> None:
23+
"""Upgrade schema."""
24+
op.add_column("task", sa.Column("model", sqlmodel.sql.sqltypes.AutoString(), nullable=True))
25+
op.add_column("task", sa.Column("type", sqlmodel.sql.sqltypes.AutoString(), nullable=True))
26+
op.add_column("task", sa.Column("source", sqlmodel.sql.sqltypes.AutoString(), nullable=True))
27+
op.add_column(
28+
"task", sa.Column("created_at", sqlmodel.sql.sqltypes.AutoString(), nullable=True)
29+
)
30+
op.add_column(
31+
"task", sa.Column("started_at", sqlmodel.sql.sqltypes.AutoString(), nullable=True)
32+
)
33+
op.add_column(
34+
"task", sa.Column("finished_at", sqlmodel.sql.sqltypes.AutoString(), nullable=True)
35+
)
36+
37+
38+
def downgrade() -> None:
39+
"""Downgrade schema."""
40+
op.drop_column("task", "finished_at")
41+
op.drop_column("task", "started_at")
42+
op.drop_column("task", "created_at")
43+
op.drop_column("task", "source")
44+
op.drop_column("task", "type")
45+
op.drop_column("task", "model")

0 commit comments

Comments
 (0)