@@ -30,11 +30,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3030 libkrb5-dev \
3131 && rm -rf /var/lib/apt/lists/* /var/cache/*
3232
33- COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/bin/uv
33+ COPY --link -- from=ghcr.io/astral-sh/uv:latest /uv /usr/bin/uv
3434
35- RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
36- --mount=type=bind,source=./uv.lock,target=/app/uv.lock \
37- --mount=type=cache,target=/root/.cache/uv \
35+ COPY ./pyproject.toml ./uv.lock /app/
36+ RUN --mount=type=cache,target=/root/.cache/uv \
3837 uv sync \
3938 --frozen \
4039 --no-install-project \
@@ -44,6 +43,8 @@ RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
4443 --compile-bytecode
4544
4645
46+ # This layer contains .jar and .xml files of spark.jars.packages
47+ # The same list of packages should produce the same file content & metadata (modification times, ownership)
4748FROM builder AS ivy2_packages
4849
4950RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -66,9 +67,9 @@ RUN --mount=type=bind,source=./syncmaster/worker/ivy2.py,target=/app/syncmaster/
6667 python /app/docker/download_ivy2_packages.py oracle && \
6768 python /app/docker/download_ivy2_packages.py mssql && \
6869 python /app/docker/download_ivy2_packages.py mysql && \
69- mkdir -p /home/syncmaster/.ivy2/cache/ && \
7070 rsync \
7171 --archive \
72+ # ivy2 keeps file mtime as it was uploaded to Maven
7273 --times \
7374 --omit-dir-times \
7475 # ivydata-$version.properties contains download time, avoid copying it to prevent layer cache invalidation
@@ -77,25 +78,24 @@ RUN --mount=type=bind,source=./syncmaster/worker/ivy2.py,target=/app/syncmaster/
7778 --exclude 'ivyreport*' \
7879 # do not copy ~/.ivy2/jars/$group.$artifact.jar, as these are the same files as in ~/.ivy2/cache/$group/$artifact/jars/
7980 /root/.ivy2/cache/ /home/syncmaster/.ivy2/cache/ && \
80- # reset directory timestamps
81- find /home/syncmaster/.ivy2/cache/ -type d -exec touch @0 {} \; && \
8281 # # custom Spark session function may download additional jars, so user have to own them, but not jars
83- find /home/syncmaster/.ivy2/ -type d -exec chmod 777 {} \;
82+ find /home/syncmaster/.ivy2/ -type d -exec chmod 777 {} \; && \
83+ # reset directory timestamps
84+ find /home/syncmaster/.ivy2/ -type d -exec touch -d @0 {} \;
8485
8586RUN mkdir -p /root && ln -s /home/syncmaster/.ivy2 /root/.ivy2
8687
8788
8889FROM base AS prod
8990
9091# place python dependencies after .ivy2 because the latter are twice as heavy
91- COPY --from=builder /app/.venv/ /app/.venv/
92+ COPY --link -- from=builder /app/.venv/ /app/.venv/
9293
9394# using --link to make ~/.ivy2 a separated layer in docker image, not based on previous layers
9495COPY --link --from=ivy2_packages /home/syncmaster/.ivy2/cache/ /home/syncmaster/.ivy2/cache/
9596# If someone needs to use worker image with root user, use the same jars
9697RUN mkdir -p /root && ln -s /home/syncmaster/.ivy2 /root/.ivy2
9798
98- COPY ./pyproject.toml ./uv.lock /app/syncmaster/
9999COPY --chmod=755 ./docker/entrypoint_worker.sh /app/entrypoint.sh
100100COPY ./syncmaster/ /app/syncmaster/
101101RUN python -m compileall /app/syncmaster
@@ -107,9 +107,7 @@ USER syncmaster
107107
108108FROM ivy2_packages AS test
109109
110- RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
111- --mount=type=bind,source=./uv.lock,target=/app/uv.lock \
112- --mount=type=cache,target=/root/.cache/uv \
110+ RUN --mount=type=cache,target=/root/.cache/uv \
113111 uv sync \
114112 --frozen \
115113 --no-install-project \
@@ -120,7 +118,6 @@ RUN --mount=type=bind,source=./pyproject.toml,target=/app/pyproject.toml \
120118 --group "test" \
121119 --compile-bytecode
122120
123- COPY ./pyproject.toml ./uv.lock /app/syncmaster/
124121COPY --chmod=755 ./docker/entrypoint_worker.sh /app/entrypoint.sh
125122RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh
126123ENTRYPOINT ["/app/entrypoint.sh"]
0 commit comments