33# spark-builder: provides client libs for spark-connect
44FROM stackable/image/spark-k8s AS spark-builder
55
6- FROM stackable/image/java-base
7-
86ARG PRODUCT
97ARG PYTHON
108ARG RELEASE
@@ -18,42 +16,36 @@ LABEL name="Stackable Spark Connect Examples" \
1816 summary="Spark Connect Examples" \
1917 description="Spark Connect client libraries for Python and the JVM, including some examples."
2018
19+ # Need root to install setuptools
20+ USER root
2121
22- ENV HOME=/stackable
23-
24- COPY spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
25- COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark/connect /stackable/spark/connect
22+ COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
23+ COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/.jupyter /stackable/.jupyter
2624
2725RUN <<EOF
2826microdnf update
2927# python{version}-setuptools: needed to build the pyspark[connect] package
3028microdnf install --nodocs \
31- "python${PYTHON}" \
32- "python${PYTHON}-pip" \
3329 "python${PYTHON}-setuptools"
3430microdnf clean all
3531rm -rf /var/cache/yum
3632
37- ln -s /usr/bin/python${PYTHON} /usr/bin/python
38- ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
39-
40- # Install python libraries for the spark connect client
41- # shellcheck disable=SC2102
42- pip install --no-cache-dir pyspark[connect]==${PRODUCT}
43-
4433# All files and folders owned by root group to support running as arbitrary users.
4534# This is best practice as all container users will belong to the root group (0).
4635chown -R ${STACKABLE_USER_UID}:0 /stackable
4736chmod -R g=u /stackable
4837EOF
4938
50- # ----------------------------------------
51- # Attention: We are changing the group of all files in /stackable directly above
52- # If you do any file based actions (copying / creating etc.) below this comment you
53- # absolutely need to make sure that the correct permissions are applied!
54- # chown ${STACKABLE_USER_UID}:0
55- # ----------------------------------------
56-
5739USER ${STACKABLE_USER_UID}
5840
41+ # Install python packages.
42+ # Packages are intentionally installed in "user mode" to reduce the container attack surface.
43+ # - pyspark[connect] = spark connect client libs
44+ # - jupyterlab = notebook client used in demos
45+ RUN pip install --no-cache-dir --user \
46+ "pyspark[connect]==${PRODUCT}" \
47+ "jupyterlab==4.4.1" \
48+ "scikit-learn==1.3.1" \
49+ "matplotlib==3.10.1"
50+
5951WORKDIR /stackable/spark-connect-examples/python
0 commit comments