@@ -21,7 +21,8 @@ LABEL name="Stackable Spark Connect Examples" \
2121
2222ENV HOME=/stackable
2323
24- COPY spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
24+ COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
25+ COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/.jupyter /stackable/.jupyter
2526COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark/connect /stackable/spark/connect
2627
2728RUN <<EOF
@@ -37,10 +38,6 @@ rm -rf /var/cache/yum
3738ln -s /usr/bin/python${PYTHON} /usr/bin/python
3839ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
3940
40- # Install python libraries for the spark connect client
41- # shellcheck disable=SC2102
42- pip install --no-cache-dir pyspark[connect]==${PRODUCT}
43-
4441# All files and folders owned by root group to support running as arbitrary users.
4542# This is best practice as all container users will belong to the root group (0).
4643chown -R ${STACKABLE_USER_UID}:0 /stackable
5653
5754USER ${STACKABLE_USER_UID}
5855
56+ # Install python packages.
57+ # Packages are intentionally installed in "user mode" to reduce the container attack surface.
58+ # - pyspark[connect] = spark connect client libs
59+ # - jupyterlab = notebook client used in demos
60+ # shellcheck disable=SC2102
61+ RUN pip install --no-cache-dir --user \
62+ pyspark[connect]==${PRODUCT} \
63+ jupyterlab==4.4.1 \
64+ scikit-learn==1.3.1
65+
5966WORKDIR /stackable/spark-connect-examples/python
0 commit comments