Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ assets
**/.pytest_cache
**/.coverage
**/coverage.xml

# rust build artifacts
libs/libviewer/target
**/target
**/*.rs.bk
**/.cargo/registry
**/.cargo/git
12 changes: 8 additions & 4 deletions .github/workflows/_unit-tests-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ on:
working-directory:
required: true
type: string
poetry-args:
required: false
type: string
default: ""
env:
# required to get access to use a cached poetry venv in "/home/runner/.cache/pypoetry/virtualenvs"
POETRY_VIRTUALENVS_IN_PROJECT: false
Expand All @@ -31,16 +35,16 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ env.python-version }}
cache: "poetry"
cache-dependency-path: |
${{ inputs.working-directory }}/poetry.lock
# cache: "poetry"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I temporarily turned it off because it wasn't installing the optional libviewer dependency properly.

# cache-dependency-path: |
# ${{ inputs.working-directory }}/poetry.lock
- name: Install packages
run: sudo apt update; sudo apt install -y ffmpeg libavcodec-extra libsndfile1
- name: Install dependencies
# "poetry env use" is required: https://github.com/actions/setup-python/issues/374#issuecomment-1088938718
run: |
poetry env use "${{ env.python-version }}"
poetry install
poetry install ${{ inputs.poetry-args }}
- name: Launch mongo
env:
MONGO_PORT: ${{ env.mongo-port }}
Expand Down
17 changes: 15 additions & 2 deletions .github/workflows/l-libcommon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,37 @@
- main
paths:
- "libs/libcommon/**"
- "libs/libviewer/**"
- ".github/workflows/l-libcommon.yml"
- ".github/workflows/_quality-python.yml"
- ".github/workflows/_unit-tests-python.yml"
- "tools/docker-compose-mongo.yml"
pull_request:
paths:
- "libs/libcommon/**"
- "libs/libviewer/**"
- ".github/workflows/l-libcommon.yml"
- ".github/workflows/_quality-python.yml"
- ".github/workflows/_unit-tests-python.yml"
- "tools/docker-compose-mongo.yml"
jobs:
quality:
quality-libcommon:
uses: ./.github/workflows/_quality-python.yml
with:
working-directory: libs/libcommon
unit-tests:
quality-libviewer:
uses: ./.github/workflows/_quality-python.yml
with:
working-directory: libs/libviewer
unit-tests-libcommon:
uses: ./.github/workflows/_unit-tests-python.yml
with:
working-directory: libs/libcommon
poetry-args: "--with libviewer"
secrets: inherit
unit-tests-libviewer:
uses: ./.github/workflows/_unit-tests-python.yml
with:
working-directory: libs/libviewer
poetry-args: "--with dev"
secrets: inherit

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {}
1 change: 1 addition & 0 deletions .github/workflows/s-worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ jobs:
uses: ./.github/workflows/_unit-tests-python.yml
with:
working-directory: services/worker
poetry-args: "--with libviewer"
secrets: inherit
96 changes: 58 additions & 38 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,120 +1,140 @@
# Multi-stage Dockerfile for all dataset-viewer services and jobs
# Build with: docker build --target <service_name> -t <tag> .

ARG PYTHON_VERSION=3.12.11
FROM python:${PYTHON_VERSION}-slim AS viewer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Building the rust based libviewer as a wheel to not include the compiler toolchains in the final docker images.


# Install Rust and minimal build deps
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl build-essential \
&& rm -rf /var/lib/apt/lists/*

# Install Rust toolchain and maturin
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y \
&& . $HOME/.cargo/env \
&& pip install maturin \
&& rustc --version \
&& cargo --version
# Add cargo bin dir to PATH (so maturin + cargo available globally)
ENV PATH="/root/.cargo/bin:${PATH}"

# Build libviewer
COPY libs/libviewer /src/libs/libviewer
WORKDIR /src/libs/libviewer
RUN maturin build --release --strip --out /tmp/dist

# Base stage with shared setup
FROM python:3.12.11-slim AS common
FROM python:${PYTHON_VERSION}-slim AS common

# System dependencies
RUN apt-get update \
&& apt-get install -y unzip wget procps htop ffmpeg libavcodec-extra libsndfile1 \
&& rm -rf /var/lib/apt/lists/*

# Common environment variables
ARG POETRY_VERSION=2.1.4
ENV PYTHONFAULTHANDLER=1 \
PYTHONUNBUFFERED=1 \
PYTHONHASHSEED=random \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
POETRY_NO_INTERACTION=1 \
POETRY_VERSION=2.1.4 \
POETRY_VIRTUALENVS_CREATE=false \
PATH="$PATH:/root/.local/bin"

# Install pip and poetry
RUN pip install -U pip && pip install "poetry==$POETRY_VERSION"
RUN pip install -U pip && pip install "poetry==${POETRY_VERSION}"

# Install libcommon's dependencies but not libcommon itself
COPY libs/libcommon/poetry.lock \
libs/libcommon/pyproject.toml \
/src/libs/libcommon/
RUN poetry install --no-cache --no-root --no-directory -P /src/libs/libcommon

# Base image for services including libapi's dependencies
FROM common AS service
COPY libs/libapi/poetry.lock \
libs/libapi/pyproject.toml \
/src/libs/libapi/
RUN poetry install --no-cache --no-root --no-directory -P /src/libs/libapi
WORKDIR /src/libs/libcommon
RUN poetry install --no-cache --no-root

# Below are the actual API services which depend on libapi and libcommon.
# Since the majority of the dependencies are already installed in the `api`
# we let poetry to actually install the `libs` and the specific service.
# Since the majority of the dependencies are already installed in the
# `common` stage we let poetry to handle the rest.

# API service
FROM service AS api
FROM common AS api
COPY libs /src/libs
COPY services/api /src/services/api
RUN poetry install --no-cache -P /src/services/api
WORKDIR /src/services/api/
WORKDIR /src/services/api
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/api/main.py"]

# Admin service
FROM service AS admin
FROM common AS admin
COPY libs /src/libs
COPY services/admin /src/services/admin
RUN poetry install --no-cache -P /src/services/admin
WORKDIR /src/services/admin/
WORKDIR /src/services/admin
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/admin/main.py"]

# Rows service
FROM service AS rows
FROM common AS rows
COPY --from=viewer /tmp/dist /tmp/dist
RUN pip install /tmp/dist/libviewer-*.whl
COPY libs /src/libs
COPY services/rows /src/services/rows
RUN poetry install --no-cache -P /src/services/rows
WORKDIR /src/services/rows/
WORKDIR /src/services/rows
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/rows/main.py"]

# Search service
FROM service AS search
FROM common AS search
COPY libs /src/libs
COPY services/search /src/services/search
RUN poetry install --no-cache -P /src/services/search
WORKDIR /src/services/search/
WORKDIR /src/services/search
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/search/main.py"]

# SSE API service
FROM service AS sse-api
FROM common AS sse-api
COPY libs /src/libs
COPY services/sse-api /src/services/sse-api
RUN poetry install --no-cache -P /src/services/sse-api
WORKDIR /src/services/sse-api/
WORKDIR /src/services/sse-api
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/sse_api/main.py"]

# Webhook service
FROM service AS webhook
FROM common AS webhook
COPY libs /src/libs
COPY services/webhook /src/services/webhook
RUN poetry install --no-cache -P /src/services/webhook
WORKDIR /src/services/webhook/
WORKDIR /src/services/webhook
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/webhook/main.py"]

# Worker service
FROM common AS worker
COPY --from=viewer /tmp/dist /tmp/dist
RUN pip install /tmp/dist/libviewer-*.whl
COPY libs /src/libs
COPY services/worker /src/services/worker
WORKDIR /src/services/worker
# presidio-analyzer > spacy > thinc doesn't ship aarch64 wheels so need to compile
RUN if [ "$(uname -m)" = "aarch64" ]; then \
apt-get update && apt-get install -y build-essential && \
rm -rf /var/lib/apt/lists/*; \
fi
RUN poetry install --no-cache -P /src/services/worker
RUN poetry install --no-cache
RUN python -m spacy download en_core_web_lg
WORKDIR /src/services/worker/
ENTRYPOINT ["poetry", "run", "python", "src/worker/main.py"]

# Cache maintenance job
FROM common AS cache_maintenance
COPY libs /src/libs
COPY jobs/cache_maintenance /src/jobs/cache_maintenance
RUN poetry install --no-cache -P /src/jobs/cache_maintenance
WORKDIR /src/jobs/cache_maintenance/
WORKDIR /src/jobs/cache_maintenance
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/cache_maintenance/main.py"]

# MongoDB migration job
FROM common AS mongodb_migration
COPY libs /src/libs
COPY jobs/mongodb_migration /src/jobs/mongodb_migration
RUN poetry install --no-cache -P /src/jobs/mongodb_migration
WORKDIR /src/jobs/mongodb_migration/
ENTRYPOINT ["poetry", "run", "python", "src/mongodb_migration/main.py"]
WORKDIR /src/jobs/mongodb_migration
RUN poetry install --no-cache
ENTRYPOINT ["poetry", "run", "python", "src/mongodb_migration/main.py"]
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ e2e:

.PHONY: install
install:
$(MAKE) -C libs/libviewer install
$(MAKE) -C libs/libcommon install
$(MAKE) -C libs/libapi install
$(MAKE) -C jobs/cache_maintenance install
Expand Down
37 changes: 36 additions & 1 deletion jobs/mongodb_migration/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading