Skip to content

Commit 814e419

Browse files
committed
[DOP-25348] Improve docker images size
1 parent 9a3f895 commit 814e419

File tree

4 files changed

+102
-73
lines changed

4 files changed

+102
-73
lines changed

.dockerignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
__pycache__/
2-
*.py[cod]
1+
**/__pycache__/
2+
**.pyc
3+
**.pyo
4+
**.pyd
35
*.envx
46
*.cover
57
.coverage

docker/Dockerfile.scheduler

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,48 @@
1-
ARG BASE_IMAGE=python:3.13-slim
2-
FROM $BASE_IMAGE AS base
1+
ARG PYTHON_VERSION=3.13
2+
FROM python:$PYTHON_VERSION-slim AS base
33

4-
RUN apt-get update && apt-get install -y curl \
5-
&& rm -rf /var/lib/apt/lists/*
4+
WORKDIR /app
5+
ENV PYTHONPATH=/app \
6+
PATH="/app/.venv/bin:$PATH" \
7+
POETRY_VIRTUALENVS_IN_PROJECT=1 \
8+
POETRY_VIRTUALENVS_CREATE=1
69

7-
RUN pip install --upgrade pip setuptools wheel packaging && \
8-
curl -sSL https://install.python-poetry.org | python3 - && \
9-
ln -s /root/.local/bin/poetry /usr/local/bin/poetry && \
10-
poetry config virtualenvs.create false
10+
COPY ./docker/entrypoint_scheduler.sh /app/entrypoint.sh
11+
ENTRYPOINT ["/app/entrypoint.sh"]
1112

12-
WORKDIR /app
13-
ENV PYTHONPATH=/app
1413

15-
COPY ./pyproject.toml ./poetry.lock* /app/
14+
FROM base AS builder
1615

16+
RUN pip install poetry
17+
18+
COPY ./pyproject.toml ./poetry.lock ./
1719
RUN --mount=type=cache,target=/root/.cache/pypoetry \
1820
poetry install \
1921
--no-root \
2022
# TODO: make scheduler independent from server
2123
--extras "server" \
2224
--extras "scheduler" \
23-
--without test,docs,dev
25+
--without test,docs,dev \
26+
&& python -m compileall .venv
2427

25-
COPY ./docker/entrypoint_scheduler.sh /app/entrypoint.sh
26-
ENTRYPOINT ["/app/entrypoint.sh"]
2728

2829
FROM base AS prod
2930

31+
# We don't need poetry in final image
32+
COPY --from=builder /app/.venv/ /app/.venv/
3033
COPY ./syncmaster/ /app/syncmaster/
34+
RUN python -m compileall -b syncmaster
3135

3236

33-
FROM base AS test
37+
FROM builder AS test
3438

3539
RUN --mount=type=cache,target=/root/.cache/pypoetry \
3640
poetry install \
3741
--no-root \
42+
--extras "server" \
3843
--extras "scheduler" \
3944
--with test \
40-
--without docs,dev
45+
--without docs,dev \
46+
&& python -m compileall .venv
4147

4248
RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh

docker/Dockerfile.server

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,24 @@
1-
ARG BASE_IMAGE=python:3.13-slim
2-
FROM $BASE_IMAGE AS base
1+
ARG PYTHON_VERSION=3.13
2+
FROM python:$PYTHON_VERSION-slim AS base
33

4-
RUN apt-get update && apt-get install -y curl \
5-
&& rm -rf /var/lib/apt/lists/*
6-
7-
RUN pip install --upgrade pip setuptools wheel packaging && \
8-
curl -sSL https://install.python-poetry.org | python3 - && \
9-
ln -s /root/.local/bin/poetry /usr/local/bin/poetry && \
10-
poetry config virtualenvs.create false
4+
RUN apt-get update \
5+
&& apt-get install -y --no-install-recommends \
6+
# Used for healthcheck
7+
curl \
8+
&& rm -rf /var/lib/apt/lists/* /var/cache/*
119

1210
WORKDIR /app
13-
ENV PYTHONPATH=/app
14-
15-
COPY ./pyproject.toml ./poetry.lock* /app/
16-
17-
RUN --mount=type=cache,target=/root/.cache/pypoetry \
18-
poetry install \
19-
--no-root \
20-
--extras "server" \
21-
--without test,docs,dev
11+
ENV PYTHONPATH=/app \
12+
PATH="/app/.venv/bin:$PATH" \
13+
POETRY_VIRTUALENVS_IN_PROJECT=1 \
14+
POETRY_VIRTUALENVS_CREATE=1
2215

23-
COPY ./docker/entrypoint_server.sh /app/entrypoint.sh
24-
ENTRYPOINT ["/app/entrypoint.sh"]
25-
26-
27-
FROM base AS prod
28-
29-
COPY ./syncmaster/ /app/syncmaster/
3016
# add this when logo will be ready
3117
# COPY ./docs/_static/*.svg ./syncmaster/server/static/
3218

3319
# Swagger UI
34-
ADD https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui-bundle.js /app/syncmaster/server/static/swagger/swagger-ui-bundle.js
35-
ADD https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui.css /app/syncmaster/server/static/swagger/swagger-ui.css
20+
ADD https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui-bundle.js https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui.css \
21+
/app/syncmaster/server/static/swagger/
3622

3723
# Redoc
3824
ADD https://cdn.jsdelivr.net/npm/redoc@latest/bundles/redoc.standalone.js /app/syncmaster/server/static/redoc/redoc.standalone.js
@@ -42,14 +28,39 @@ ENV SYNCMASTER__SERVER__OPENAPI__SWAGGER__JS_URL=/static/swagger/swagger-ui-bund
4228
SYNCMASTER__SERVER__OPENAPI__REDOC__JS_URL=/static/redoc/redoc.standalone.js \
4329
SYNCMASTER__SERVER__STATIC_FILES__DIRECTORY=/app/syncmaster/server/static
4430

31+
COPY ./docker/entrypoint_server.sh /app/entrypoint.sh
32+
ENTRYPOINT ["/app/entrypoint.sh"]
33+
34+
35+
FROM base AS builder
36+
37+
RUN pip install poetry
38+
39+
COPY ./pyproject.toml ./poetry.lock* ./
40+
RUN --mount=type=cache,target=/root/.cache/pypoetry \
41+
poetry install \
42+
--no-root \
43+
--extras "server" \
44+
--without test,docs,dev \
45+
&& python -m compileall .venv
46+
47+
48+
FROM base AS prod
49+
50+
# We don't need poetry in final image
51+
COPY --from=builder /app/.venv/ /app/.venv/
52+
COPY ./syncmaster/ /app/syncmaster/
53+
RUN python -m compileall -b syncmaster
54+
4555

46-
FROM base AS test
56+
FROM builder AS test
4757

4858
RUN --mount=type=cache,target=/root/.cache/pypoetry \
4959
poetry install \
5060
--no-root \
5161
--extras "server" \
5262
--with test \
53-
--without docs,dev
63+
--without docs,dev \
64+
&& python -m compileall .venv
5465

5566
RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh

docker/Dockerfile.worker

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,63 @@
1-
ARG BASE_IMAGE=python:3.13-slim
2-
FROM $BASE_IMAGE AS base
1+
ARG PYTHON_VERSION=3.13
2+
FROM python:$PYTHON_VERSION-slim AS base
33

4-
ENV DEBIAN_FRONTEND=noninteractive
54
RUN apt-get update && apt-get install -y --no-install-recommends \
65
openjdk-17-jdk-headless \
6+
# required for HDFS/Hive with Kerberos enabled
7+
krb5-user \
8+
&& rm -rf /var/lib/apt/lists/* /var/cache/*
9+
10+
WORKDIR /app
11+
ENV PYTHONPATH=/app \
12+
PATH="/app/.venv/bin:$PATH" \
13+
POETRY_VIRTUALENVS_IN_PROJECT=1 \
14+
POETRY_VIRTUALENVS_CREATE=1
15+
16+
COPY ./docker/entrypoint_worker.sh /app/entrypoint.sh
17+
ENTRYPOINT ["/app/entrypoint.sh"]
18+
CMD ["--loglevel=info"]
19+
20+
21+
FROM base AS builder
22+
23+
RUN apt-get update && apt-get install -y --no-install-recommends \
24+
autoconf \
725
gcc \
826
make \
9-
libffi-dev \
27+
# required to build gssapi from sources
1028
libkrb5-dev \
11-
curl \
12-
&& rm -rf /var/lib/apt/lists/*
29+
&& rm -rf /var/lib/apt/lists/* /var/cache/*
1330

14-
RUN pip install --upgrade pip setuptools wheel packaging && \
15-
curl -sSL https://install.python-poetry.org | python3 - && \
16-
ln -s /root/.local/bin/poetry /usr/local/bin/poetry && \
17-
poetry config virtualenvs.create false
18-
19-
WORKDIR /app
20-
ENV PYTHONPATH=/app
21-
22-
COPY ./pyproject.toml ./poetry.lock* /app/
31+
RUN pip install poetry
2332

33+
COPY ./pyproject.toml ./poetry.lock ./
2434
RUN --mount=type=cache,target=/root/.cache/pypoetry \
2535
poetry install \
2636
--no-root \
2737
--extras "worker" \
28-
--without test,docs,dev
29-
30-
COPY ./docker/entrypoint_worker.sh /app/entrypoint.sh
31-
ENTRYPOINT ["/app/entrypoint.sh"]
32-
CMD ["--loglevel=info"]
38+
--without test,docs,dev \
39+
&& python -m compileall .venv
3340

3441

3542
FROM base AS prod
3643

44+
# We don't need poetry and compilers in final image
45+
COPY --from=builder /app/.venv/ /app/.venv/
3746
COPY ./syncmaster/ /app/syncmaster/
47+
RUN python -m compileall syncmaster
3848

3949

40-
FROM base AS test
41-
42-
ENV SYNCMASTER__WORKER__CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session
43-
44-
# CI runs tests in the worker container, so we need server & scheduler dependencies too
50+
FROM builder AS test
4551

4652
RUN --mount=type=cache,target=/root/.cache/pypoetry \
4753
poetry install \
4854
--no-root \
55+
# CI runs tests in the worker container,
56+
# so we need server & scheduler dependencies too
4957
--all-extras \
5058
--with test \
51-
--without docs,dev
59+
--without docs,dev \
60+
&& python -m compileall .venv
5261

5362
RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh
63+
ENV SYNCMASTER__WORKER__CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session

0 commit comments

Comments
 (0)