@@ -96,35 +96,39 @@ RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoo
9696 wget https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.3.0/mysql-connector-j-8.3.0.jar
9797
9898# ##########################################
99- # Stage 4: Final runtime image for K8s (Spark jobs)
99+ # Stage 4: Final runtime image for K8s + Jupyter
100100# ##########################################
101101FROM spark-base AS final
102102
103103# Non-root user with home dir
104104RUN groupadd -r -g 185 spark && \
105105 useradd -m -r -u 185 -g 185 -d /home/spark spark
106106
107- # Env para PySpark
107+ # Env for Jupyter + PySpark
108108ENV HOME=/home/spark \
109+ JUPYTER_PORT=8888 \
110+ JUPYTER_DIR=/opt/spark/work-dir/notebooks \
109111 PYSPARK_PYTHON=/usr/local/bin/python3.11 \
110112 PYSPARK_DRIVER_PYTHON=/usr/local/bin/python3.11 \
111113 PYTHONPATH="${SPARK_HOME}/python"
112114
113- # 👉 COPIAR O CÓDIGO DO PROJECTO PARA DENTRO DA IMAGEM
114- # (assumindo que fazes build a partir da raiz do repo com:
115- # docker build -f Docker/Dockerfile .)
116- COPY --chown=spark:spark src /opt/spark/src
115+ # PySpark + JupyterLab + libs
116+ RUN pip install --no-cache-dir \
117+ pyspark==3.5.7 \
118+ pandas \
119+ numpy \
120+ jupyterlab==4.2.5
117121
118- # Garantir permissões
119- RUN mkdir -p /opt/spark/work-dir && \
120- chown -R spark:spark /opt/spark /home/spark
122+ # Dirs Jupyter + notebooks
123+ RUN mkdir -p "${JUPYTER_DIR}" \
124+ && mkdir -p "${HOME}/.local/share/jupyter/runtime" \
125+ && mkdir -p "${HOME}/.jupyter" \
126+ && chown -R spark:spark /home/spark /opt/spark
121127
122128USER 185
123- WORKDIR /opt/spark/work-dir
129+ WORKDIR ${JUPYTER_DIR}
124130
125- # 👉 ENTRYPOINT DE PRODUÇÃO: usar o entrypoint oficial do Spark
126- # (que já copiámos do apache/spark lá em cima)
127- ENTRYPOINT ["/opt/entrypoint.sh" ]
131+ EXPOSE 8888
128132
129- # Sem CMD fixo: o Spark Operator trata de passar o comando,
130- # mainApplicationFile, etc., via SparkApplication
133+ # Default: start JupyterLab (K8s manifest pode override se quiser usar só spark-submit)
134+ ENTRYPOINT [ "bash" , "-lc" , "jupyter lab --ip=0.0.0.0 --port=${JUPYTER_PORT} --no-browser --ServerApp.root_dir=${JUPYTER_DIR} --ServerApp.token='' --ServerApp.password=''" ]
0 commit comments