11# Multi-stage Dockerfile for all dataset-viewer services and jobs
22# Build with: docker build --target <service_name> -t <tag> .
33
4+ ARG PYTHON_VERSION=3.12.11
5+ FROM python:${PYTHON_VERSION}-slim AS viewer
6+
7+ # Install Rust and minimal build deps
8+ RUN apt-get update \
9+ && apt-get install -y --no-install-recommends curl build-essential \
10+ && rm -rf /var/lib/apt/lists/*
11+
12+ # Install Rust toolchain and maturin
13+ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y \
14+ && . $HOME/.cargo/env \
15+ && pip install maturin \
16+ && rustc --version \
17+ && cargo --version
18+ # Add cargo bin dir to PATH (so maturin + cargo available globally)
19+ ENV PATH="/root/.cargo/bin:${PATH}"
20+
21+ # Build libviewer
22+ COPY libs/libviewer /src/libs/libviewer
23+ WORKDIR /src/libs/libviewer
24+ RUN maturin build --release --strip --out /tmp/dist
25+
426# Base stage with shared setup
5- FROM python:3.12.11 -slim AS common
27+ FROM python:${PYTHON_VERSION} -slim AS common
628
729# System dependencies
830RUN apt-get update \
931 && apt-get install -y unzip wget procps htop ffmpeg libavcodec-extra libsndfile1 \
1032 && rm -rf /var/lib/apt/lists/*
1133
1234# Common environment variables
35+ ARG POETRY_VERSION=2.1.4
1336ENV PYTHONFAULTHANDLER=1 \
1437 PYTHONUNBUFFERED=1 \
1538 PYTHONHASHSEED=random \
1639 PIP_NO_CACHE_DIR=1 \
1740 PIP_DISABLE_PIP_VERSION_CHECK=on \
1841 PIP_DEFAULT_TIMEOUT=100 \
1942 POETRY_NO_INTERACTION=1 \
20- POETRY_VERSION=2.1.4 \
2143 POETRY_VIRTUALENVS_CREATE=false \
2244 PATH="$PATH:/root/.local/bin"
2345
2446# Install pip and poetry
25- RUN pip install -U pip && pip install "poetry==$POETRY_VERSION"
47+ RUN pip install -U pip && pip install "poetry==${ POETRY_VERSION} "
2648
2749# Install libcommon's dependencies but not libcommon itself
2850COPY libs/libcommon/poetry.lock \
2951 libs/libcommon/pyproject.toml \
3052 /src/libs/libcommon/
31- RUN poetry install --no-cache --no-root --no-directory -P /src/libs/libcommon
32-
33- # Base image for services including libapi's dependencies
34- FROM common AS service
35- COPY libs/libapi/poetry.lock \
36- libs/libapi/pyproject.toml \
37- /src/libs/libapi/
38- RUN poetry install --no-cache --no-root --no-directory -P /src/libs/libapi
53+ WORKDIR /src/libs/libcommon
54+ RUN poetry install --no-cache --no-root
3955
4056# Below are the actual API services which depend on libapi and libcommon.
41- # Since the majority of the dependencies are already installed in the `api`
42- # we let poetry to actually install the `libs` and the specific service.
43-
44- # API service
45- FROM service AS api
46- COPY libs /src/libs
47- COPY services/api /src/services/api
48- RUN poetry install --no-cache -P /src/services/api
49- WORKDIR /src/services/api/
50- ENTRYPOINT ["poetry" , "run" , "python" , "src/api/main.py" ]
57+ # Since the majority of the dependencies are already installed in the
58+ # `common` stage we let poetry to handle the rest.
5159
5260# Admin service
53- FROM service AS admin
61+ FROM common AS admin
5462COPY libs /src/libs
5563COPY services/admin /src/services/admin
56- RUN poetry install --no-cache -P /src/services/admin
57- WORKDIR /src/services/admin/
64+ WORKDIR /src/services/admin
65+ RUN poetry install --no-cache
5866ENTRYPOINT ["poetry" , "run" , "python" , "src/admin/main.py" ]
5967
6068# Rows service
61- FROM service AS rows
69+ FROM common AS rows
70+ COPY --from=viewer /tmp/dist /tmp/dist
71+ RUN pip install /tmp/dist/libviewer-*.whl
6272COPY libs /src/libs
6373COPY services/rows /src/services/rows
64- RUN poetry install --no-cache -P /src/services/rows
65- WORKDIR /src/services/rows/
74+ WORKDIR /src/services/rows
75+ RUN poetry install --no-cache
6676ENTRYPOINT ["poetry" , "run" , "python" , "src/rows/main.py" ]
6777
6878# Search service
69- FROM service AS search
79+ FROM common AS search
7080COPY libs /src/libs
7181COPY services/search /src/services/search
72- RUN poetry install --no-cache -P /src/services/search
73- WORKDIR /src/services/search/
82+ WORKDIR /src/services/search
83+ RUN poetry install --no-cache
7484ENTRYPOINT ["poetry" , "run" , "python" , "src/search/main.py" ]
7585
7686# SSE API service
77- FROM service AS sse-api
87+ FROM common AS sse-api
7888COPY libs /src/libs
7989COPY services/sse-api /src/services/sse-api
80- RUN poetry install --no-cache -P /src/services/sse-api
81- WORKDIR /src/services/sse-api/
90+ WORKDIR /src/services/sse-api
91+ RUN poetry install --no-cache
8292ENTRYPOINT ["poetry" , "run" , "python" , "src/sse_api/main.py" ]
8393
8494# Webhook service
85- FROM service AS webhook
95+ FROM common AS webhook
8696COPY libs /src/libs
8797COPY services/webhook /src/services/webhook
88- RUN poetry install --no-cache -P /src/services/webhook
89- WORKDIR /src/services/webhook/
98+ WORKDIR /src/services/webhook
99+ RUN poetry install --no-cache
90100ENTRYPOINT ["poetry" , "run" , "python" , "src/webhook/main.py" ]
91101
92102# Worker service
93103FROM common AS worker
94104COPY libs /src/libs
95105COPY services/worker /src/services/worker
106+ WORKDIR /src/services/worker
96107# presidio-analyzer > spacy > thinc doesn't ship aarch64 wheels so need to compile
97108RUN if [ "$(uname -m)" = "aarch64" ]; then \
98109 apt-get update && apt-get install -y build-essential && \
99110 rm -rf /var/lib/apt/lists/*; \
100111 fi
101- RUN poetry install --no-cache -P /src/services/worker
112+ RUN poetry install --no-cache
102113RUN python -m spacy download en_core_web_lg
103- WORKDIR /src/services/worker/
104114ENTRYPOINT ["poetry" , "run" , "python" , "src/worker/main.py" ]
105115
106116# Cache maintenance job
107117FROM common AS cache-maintenance
108118COPY libs /src/libs
109119COPY jobs/cache_maintenance /src/jobs/cache_maintenance
110- RUN poetry install --no-cache -P /src/jobs/cache_maintenance
111- WORKDIR /src/jobs/cache_maintenance/
120+ WORKDIR /src/jobs/cache_maintenance
121+ RUN poetry install --no-cache
112122ENTRYPOINT ["poetry" , "run" , "python" , "src/cache_maintenance/main.py" ]
113123
114124# MongoDB migration job
115125FROM common AS mongodb-migration
116126COPY libs /src/libs
117127COPY jobs/mongodb_migration /src/jobs/mongodb_migration
118- RUN poetry install --no-cache -P /src/jobs/mongodb_migration
119- WORKDIR /src/jobs/mongodb_migration/
128+ WORKDIR /src/jobs/mongodb_migration
129+ RUN poetry install --no-cache
120130ENTRYPOINT ["poetry" , "run" , "python" , "src/data_migration/main.py" ]
0 commit comments