Skip to content

Commit 92fd9df

Browse files
committed
[DOP-30579] Improve ivy2 package caching
1 parent 462fb89 commit 92fd9df

File tree

2 files changed

+16
-12
lines changed

2 files changed

+16
-12
lines changed

docker/Dockerfile.worker

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,25 @@ RUN --mount=type=cache,target=/root/.cache/pypoetry \
4141
&& python -m compileall -j 4 .venv
4242

4343

44-
FROM builder AS maven_packages
44+
FROM builder AS ivy2_packages
4545

4646
RUN --mount=type=bind,source=./syncmaster/worker/ivy2.py,target=/app/syncmaster/worker/ivy2.py \
47-
--mount=type=bind,source=./docker/download_maven_packages.py,target=/app/docker/download_maven_packages.py \
48-
mkdir /root/.ivy2 && \
47+
--mount=type=bind,source=./docker/download_ivy2_packages.py,target=/app/docker/download_ivy2_packages.py \
48+
--mount=type=cache,target=/root/.ivy2 \
4949
# Try to download all dependencies at once.
5050
# If multiple packages depends on the same transitive dependency, Spark uses maximum version of this dependency.
51-
python /app/docker/download_maven_packages.py all && \
51+
python /app/docker/download_ivy2_packages.py all && \
5252
# Then try to download specific connectors to fetch exact dependency version specified within connector.
5353
# Yes, this is slow, but overwise using worker without internet access will fail, unless custom ivysettings.xml is used
54-
python /app/docker/download_maven_packages.py s3 && \
55-
python /app/docker/download_maven_packages.py hdfs && \
56-
python /app/docker/download_maven_packages.py clickhouse && \
57-
python /app/docker/download_maven_packages.py postgres && \
58-
python /app/docker/download_maven_packages.py oracle && \
59-
python /app/docker/download_maven_packages.py mssql && \
60-
python /app/docker/download_maven_packages.py mysql
54+
python /app/docker/download_ivy2_packages.py s3 && \
55+
python /app/docker/download_ivy2_packages.py hdfs && \
56+
python /app/docker/download_ivy2_packages.py clickhouse && \
57+
python /app/docker/download_ivy2_packages.py postgres && \
58+
python /app/docker/download_ivy2_packages.py oracle && \
59+
python /app/docker/download_ivy2_packages.py mssql && \
60+
python /app/docker/download_ivy2_packages.py mysql && \
61+
mkdir -p /home/syncmaster/.ivy2 && \
62+
cp --recursive /root/.ivy2/ /home/syncmaster/.ivy2/
6163
# if someone uses custom worker image, they should download jars on their own
6264

6365

@@ -72,7 +74,9 @@ RUN useradd syncmaster && \
7274
# We don't need poetry and compilers in final image
7375
COPY --from=builder /app/.venv/ /app/.venv/
7476
# custom Spark session function may download different jars, so syncmaster have to own them
75-
COPY --from=maven_packages --chown=syncmaster:syncmaster /root/.ivy2/ /home/syncmaster/.ivy2/
77+
COPY --from=ivy2_packages --chown=syncmaster:syncmaster /home/syncmaster/.ivy2/ /home/syncmaster/.ivy2/
78+
# If someone needs to use worker image with root user, use the same jars
79+
RUN ln -s /home/syncmaster/.ivy2 /root/.ivy2
7680

7781
COPY ./syncmaster/ /app/syncmaster/
7882
RUN python -m compileall syncmaster

0 commit comments

Comments
 (0)