Skip to content

Commit 5064b5f

Browse files
committed
feat: install demo dependencies
1 parent d0d95fd commit 5064b5f

File tree

2 files changed

+1845
-5
lines changed

2 files changed

+1845
-5
lines changed

spark-connect-client/Dockerfile

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ LABEL name="Stackable Spark Connect Examples" \
2121

2222
ENV HOME=/stackable
2323

24-
COPY spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
24+
COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
25+
COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/.jupyter /stackable/.jupyter
2526
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark/connect /stackable/spark/connect
2627

2728
RUN <<EOF
@@ -37,10 +38,6 @@ rm -rf /var/cache/yum
3738
ln -s /usr/bin/python${PYTHON} /usr/bin/python
3839
ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
3940

40-
# Install python libraries for the spark connect client
41-
# shellcheck disable=SC2102
42-
pip install --no-cache-dir pyspark[connect]==${PRODUCT}
43-
4441
# All files and folders owned by root group to support running as arbitrary users.
4542
# This is best practice as all container users will belong to the root group (0).
4643
chown -R ${STACKABLE_USER_UID}:0 /stackable
@@ -56,4 +53,14 @@ EOF
5653

5754
USER ${STACKABLE_USER_UID}
5855

56+
# Install python packages.
57+
# Packages are intentionally installed in "user mode" to reduce the container attack surface.
58+
# - pyspark[connect] = spark connect client libs
59+
# - jupyterlab = notebook client used in demos
60+
# shellcheck disable=SC2102
61+
RUN pip install --no-cache-dir --user \
62+
pyspark[connect]==${PRODUCT} \
63+
jupyterlab==4.4.1 \
64+
scikit-learn==1.3.1
65+
5966
WORKDIR /stackable/spark-connect-examples/python

0 commit comments

Comments
 (0)