Skip to content

Commit 8754664

Browse files
committed
Rearrange docker image layers for better cache
1 parent 136d107 commit 8754664

File tree

3 files changed

+114
-70
lines changed

3 files changed

+114
-70
lines changed

docker/Dockerfile.scheduler

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
11
ARG PYTHON_VERSION=3.13
22
FROM python:$PYTHON_VERSION-slim-bookworm AS base
33

4+
RUN useradd syncmaster --create-home && \
5+
mkdir -p /home/syncmaster && \
6+
chown -R syncmaster:syncmaster /home/syncmaster
7+
48
WORKDIR /app
59
ENV PYTHONPATH=/app \
6-
PATH="/app/.venv/bin:$PATH" \
7-
PYTHONUNBUFFERED=1
8-
9-
COPY ./docker/entrypoint_scheduler.sh /app/entrypoint.sh
10-
RUN chmod +x /app/entrypoint.sh
11-
ENTRYPOINT ["/app/entrypoint.sh"]
10+
PATH="/app/.venv/bin:$PATH"
1211

1312

1413
FROM base AS builder
1514

1615
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/bin/uv
1716

18-
COPY ./pyproject.toml ./uv.lock ./
19-
RUN --mount=type=cache,target=/root/.cache/uv \
17+
RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
18+
--mount=type=bind,source=./uv.lock,target=/app/uv.lock \
19+
--mount=type=cache,target=/root/.cache/uv \
2020
uv sync \
2121
--frozen \
2222
--no-install-project \
23+
--link-mode copy \
2324
# TODO: make scheduler independent from server
2425
--extra "server" \
2526
--extra "scheduler" \
@@ -30,23 +31,31 @@ FROM base AS prod
3031

3132
COPY --from=builder /app/.venv/ /app/.venv/
3233
COPY ./syncmaster/ /app/syncmaster/
33-
RUN python -m compileall -b syncmaster
34+
RUN python -m compileall -b /app/syncmaster
35+
COPY ./pyproject.toml ./uv.lock /app/syncmaster/
3436

37+
COPY --chmod=755 ./docker/entrypoint_scheduler.sh /app/entrypoint.sh
38+
ENTRYPOINT ["/app/entrypoint.sh"]
3539
# Do not run production as root, to improve security.
3640
# Also user does not own anything inside the image, including venv and source code.
37-
RUN useradd syncmaster
3841
USER syncmaster
3942

4043

4144
FROM builder AS test
4245

43-
RUN --mount=type=cache,target=/root/.cache/uv \
46+
RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
47+
--mount=type=bind,source=./uv.lock,target=/app/uv.lock \
48+
--mount=type=cache,target=/root/.cache/uv \
4449
uv sync \
4550
--frozen \
4651
--no-install-project \
52+
--link-mode copy \
4753
--extra "server" \
4854
--extra "scheduler" \
4955
--group "test" \
5056
--compile-bytecode
5157

58+
COPY ./pyproject.toml ./uv.lock /app/syncmaster/
59+
COPY --chmod=755 ./docker/entrypoint_scheduler.sh /app/entrypoint.sh
5260
RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh
61+
ENTRYPOINT ["/app/entrypoint.sh"]

docker/Dockerfile.server

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,67 +7,77 @@ RUN apt-get update \
77
curl \
88
&& rm -rf /var/lib/apt/lists/* /var/cache/*
99

10+
RUN useradd syncmaster --create-home && \
11+
mkdir -p /home/syncmaster && \
12+
chown -R syncmaster:syncmaster /home/syncmaster
13+
1014
WORKDIR /app
1115
ENV PYTHONPATH=/app \
12-
PATH="/app/.venv/bin:$PATH" \
13-
PYTHONUNBUFFERED=1
14-
15-
# add this when logo will be ready
16-
COPY ./docs/_static/*.svg ./syncmaster/server/static/
17-
18-
# Swagger UI
19-
ADD https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui-bundle.js https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui.css \
20-
/app/syncmaster/server/static/swagger/
21-
22-
# Redoc
23-
ADD https://cdn.jsdelivr.net/npm/redoc@latest/bundles/redoc.standalone.js /app/syncmaster/server/static/redoc/redoc.standalone.js
24-
25-
ENV SYNCMASTER__SERVER__OPENAPI__SWAGGER__JS_URL=/static/swagger/swagger-ui-bundle.js \
26-
SYNCMASTER__SERVER__OPENAPI__SWAGGER__CSS_URL=/static/swagger/swagger-ui.css \
27-
SYNCMASTER__SERVER__OPENAPI__REDOC__JS_URL=/static/redoc/redoc.standalone.js \
28-
SYNCMASTER__SERVER__STATIC_FILES__DIRECTORY=/app/syncmaster/server/static
29-
30-
COPY ./docker/entrypoint_server.sh /app/entrypoint.sh
31-
RUN chmod +x /app/entrypoint.sh \
32-
&& chmod +r -R /app/syncmaster/server/static
33-
ENTRYPOINT ["/app/entrypoint.sh"]
34-
EXPOSE 8000
35-
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 CMD ["curl", "-f", "http://localhost:8000/monitoring/ping"]
16+
PATH="/app/.venv/bin:$PATH"
3617

3718

3819
FROM base AS builder
3920

4021
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/bin/uv
4122

42-
COPY ./pyproject.toml ./uv.lock ./
43-
RUN --mount=type=cache,target=/root/.cache/uv \
23+
RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
24+
--mount=type=bind,source=./uv.lock,target=/app/uv.lock \
25+
--mount=type=cache,target=/root/.cache/uv \
4426
uv sync \
4527
--frozen \
4628
--no-install-project \
29+
--link-mode copy \
4730
--extra "server" \
4831
--compile-bytecode
4932

5033

5134
FROM base AS prod
5235

5336
COPY --from=builder /app/.venv/ /app/.venv/
37+
38+
COPY ./docs/_static/*.svg /app/syncmaster/server/static/
39+
40+
# Swagger UI
41+
ADD https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui-bundle.js https://cdn.jsdelivr.net/npm/swagger-ui-dist@latest/swagger-ui.css \
42+
/app/syncmaster/server/static/swagger/
43+
44+
# Redoc
45+
ADD https://cdn.jsdelivr.net/npm/redoc@latest/bundles/redoc.standalone.js /app/syncmaster/server/static/redoc/redoc.standalone.js
46+
47+
ENV SYNCMASTER__SERVER__OPENAPI__SWAGGER__JS_URL=/static/swagger/swagger-ui-bundle.js \
48+
SYNCMASTER__SERVER__OPENAPI__SWAGGER__CSS_URL=/static/swagger/swagger-ui.css \
49+
SYNCMASTER__SERVER__OPENAPI__REDOC__JS_URL=/static/redoc/redoc.standalone.js \
50+
SYNCMASTER__SERVER__STATIC_FILES__DIRECTORY=/app/syncmaster/server/static
51+
5452
COPY ./syncmaster/ /app/syncmaster/
55-
RUN python -m compileall -b syncmaster
53+
RUN python -m compileall -b /app/syncmaster
54+
COPY ./pyproject.toml ./uv.lock /app/syncmaster/
5655

56+
COPY --chmod=755 ./docker/entrypoint_scheduler.sh /app/entrypoint.sh
57+
ENTRYPOINT ["/app/entrypoint.sh"]
58+
EXPOSE 8000
59+
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 CMD ["curl", "-f", "http://localhost:8000/monitoring/ping"]
5760
# Do not run production as root, to improve security.
5861
# Also user does not own anything inside the image, including venv and source code.
59-
RUN useradd syncmaster
6062
USER syncmaster
6163

6264

6365
FROM builder AS test
6466

65-
RUN --mount=type=cache,target=/root/.cache/uv \
67+
RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
68+
--mount=type=bind,source=./uv.lock,target=/app/uv.lock \
69+
--mount=type=cache,target=/root/.cache/uv \
6670
uv sync \
6771
--frozen \
6872
--no-install-project \
73+
--link-mode copy \
6974
--extra "server" \
7075
--group "test" \
7176
--compile-bytecode
7277

78+
COPY ./pyproject.toml ./uv.lock /app/syncmaster/
79+
COPY --chmod=755 ./docker/entrypoint_server.sh /app/entrypoint.sh
7380
RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh
81+
ENTRYPOINT ["/app/entrypoint.sh"]
82+
EXPOSE 8000
83+
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 CMD ["curl", "-f", "http://localhost:8000/monitoring/ping"]

docker/Dockerfile.worker

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# syntax=docker/dockerfile:1
2+
13
ARG PYTHON_VERSION=3.13
24
FROM python:$PYTHON_VERSION-slim-bookworm AS base
35

@@ -8,14 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
810
krb5-user \
911
&& rm -rf /var/lib/apt/lists/* /var/cache/*
1012

13+
RUN useradd syncmaster --create-home && \
14+
mkdir -p /home/syncmaster/.ivy2/cache && \
15+
mkdir -p /home/syncmaster/.ivy2/jars && \
16+
chown -R syncmaster:syncmaster /home/syncmaster
17+
1118
WORKDIR /app
1219
ENV PYTHONPATH=/app \
13-
PATH="/app/.venv/bin:$PATH" \
14-
PYTHONUNBUFFERED=1
15-
16-
COPY ./docker/entrypoint_worker.sh /app/entrypoint.sh
17-
RUN chmod +x /app/entrypoint.sh
18-
ENTRYPOINT ["/app/entrypoint.sh"]
20+
PATH="/app/.venv/bin:$PATH"
1921

2022

2123
FROM base AS builder
@@ -30,21 +32,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3032

3133
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/bin/uv
3234

33-
COPY ./pyproject.toml ./uv.lock ./
34-
RUN --mount=type=cache,target=/root/.cache/uv \
35+
RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
36+
--mount=type=bind,source=./uv.lock,target=/app/uv.lock \
37+
--mount=type=cache,target=/root/.cache/uv \
3538
uv sync \
3639
--frozen \
3740
--no-install-project \
41+
--link-mode copy \
3842
--extra "worker" \
3943
--extra "kerberos" \
4044
--compile-bytecode
4145

4246

4347
FROM builder AS ivy2_packages
4448

49+
RUN apt-get update && apt-get install -y --no-install-recommends \
50+
rsync \
51+
&& rm -rf /var/lib/apt/lists/* /var/cache/*
52+
4553
RUN --mount=type=bind,source=./syncmaster/worker/ivy2.py,target=/app/syncmaster/worker/ivy2.py \
4654
--mount=type=bind,source=./docker/download_ivy2_packages.py,target=/app/docker/download_ivy2_packages.py \
47-
--mount=type=cache,target=/root/.ivy2/ \
55+
--mount=type=cache,target=/root/.ivy2 \
4856
# Try to download all dependencies at once.
4957
# If multiple packages depends on the same transitive dependency, Spark uses maximum version of this dependency.
5058
python /app/docker/download_ivy2_packages.py all && \
@@ -58,53 +66,70 @@ RUN --mount=type=bind,source=./syncmaster/worker/ivy2.py,target=/app/syncmaster/
5866
python /app/docker/download_ivy2_packages.py oracle && \
5967
python /app/docker/download_ivy2_packages.py mssql && \
6068
python /app/docker/download_ivy2_packages.py mysql && \
61-
mkdir -p /home/syncmaster/.ivy2/ && \
62-
cp --recursive /root/.ivy2/* /home/syncmaster/.ivy2/
63-
# if someone uses custom worker image, they should download jars on their own
69+
mkdir -p /home/syncmaster/.ivy2/cache/ && \
70+
rsync \
71+
--archive \
72+
--times \
73+
--omit-dir-times \
74+
# ivydata-$version.properties contains download time, avoid copying it to prevent layer cache invalidation
75+
--exclude 'ivydata*.properties' \
76+
# ignored by Spark
77+
--exclude 'ivyreport*' \
78+
# do not copy ~/.ivy2/jars/$group.$artifact.jar, as these are the same files as in ~/.ivy2/cache/$group/$artifact/jars/
79+
/root/.ivy2/cache/ /home/syncmaster/.ivy2/cache/ && \
80+
# reset directory timestamps
81+
find /home/syncmaster/.ivy2/cache/ -type d -exec touch @0 {} \; && \
82+
# # custom Spark session function may download additional jars, so user have to own them, but not jars
83+
find /home/syncmaster/.ivy2/ -type d -exec chmod 777 {} \;
84+
85+
RUN mkdir -p /root && ln -s /home/syncmaster/.ivy2 /root/.ivy2
6486

6587

6688
FROM base AS prod
6789

68-
# Do not run production as root, to improve security.
69-
# Also user does not own anything inside the image, including venv and source code.
70-
RUN useradd syncmaster && \
71-
mkdir -p /home/syncmaster /home/syncmaster/.ivy2 && \
72-
chown -R syncmaster:syncmaster /home/syncmaster
73-
90+
# place python dependencies after .ivy2 because the latter are twice as heavy
7491
COPY --from=builder /app/.venv/ /app/.venv/
75-
# custom Spark session function may download different jars, so syncmaster have to own them
76-
COPY --from=ivy2_packages --chown=syncmaster:syncmaster /home/syncmaster/.ivy2/ /home/syncmaster/.ivy2/
92+
93+
# using --link to make ~/.ivy2 a separated layer in docker image, not based on previous layers
94+
COPY --link --from=ivy2_packages /home/syncmaster/.ivy2/cache/ /home/syncmaster/.ivy2/cache/
7795
# If someone needs to use worker image with root user, use the same jars
78-
RUN mkdir -p /root && \
79-
ln -s /home/syncmaster/.ivy2 /root/.ivy2
96+
RUN mkdir -p /root && ln -s /home/syncmaster/.ivy2 /root/.ivy2
8097

98+
COPY ./pyproject.toml ./uv.lock /app/syncmaster/
99+
COPY --chmod=755 ./docker/entrypoint_worker.sh /app/entrypoint.sh
81100
COPY ./syncmaster/ /app/syncmaster/
82-
RUN python -m compileall syncmaster
101+
RUN python -m compileall /app/syncmaster
102+
ENTRYPOINT ["/app/entrypoint.sh"]
103+
# Do not run production as root, to improve security.
104+
# Also user does not own anything inside the image, including venv and source code.
83105
USER syncmaster
84106

85107

86108
FROM ivy2_packages AS test
87109

88-
RUN mkdir -p /root && \
89-
ln -s /home/syncmaster/.ivy2 /root/.ivy2
90-
91-
RUN --mount=type=cache,target=/root/.cache/uv \
110+
RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
111+
--mount=type=bind,source=./uv.lock,target=/app/uv.lock \
112+
--mount=type=cache,target=/root/.cache/uv \
92113
uv sync \
93114
--frozen \
94115
--no-install-project \
116+
--link-mode copy \
95117
# CI runs tests in the worker container,
96118
# so we need server & scheduler dependencies too
97119
--all-extras \
98120
--group "test" \
99121
--compile-bytecode
100122

101-
ENV SYNCMASTER__WORKER__CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session
102-
103-
# Collect coverage from worker
123+
COPY ./pyproject.toml ./uv.lock /app/syncmaster/
124+
COPY --chmod=755 ./docker/entrypoint_worker.sh /app/entrypoint.sh
104125
RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh
126+
ENTRYPOINT ["/app/entrypoint.sh"]
105127

106128
# Replace kinit binary with dummy, to skip Kerberos interaction in tests
107129
RUN mkdir -p /app/.local/bin && \
108130
echo "#!/bin/bash" > /app/.local/bin/kinit \
109131
&& chmod +x /app/.local/bin/kinit
110132
ENV PATH="/app/.local/bin:$PATH"
133+
134+
# use custom Spark session factory
135+
ENV SYNCMASTER__WORKER__CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session

0 commit comments

Comments
 (0)