Skip to content

Commit 1adc5ef

Browse files
fix: undo changes made to dockerfile
1 parent 316597e commit 1adc5ef

File tree

1 file changed

+19
-15
lines changed

1 file changed

+19
-15
lines changed

Docker/Dockerfile

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,35 +96,39 @@ RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoo
9696
wget https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.3.0/mysql-connector-j-8.3.0.jar
9797

9898
###########################################
99-
# Stage 4: Final runtime image for K8s (Spark jobs)
99+
# Stage 4: Final runtime image for K8s + Jupyter
100100
###########################################
101101
FROM spark-base AS final
102102

103103
# Non-root user with home dir
104104
RUN groupadd -r -g 185 spark && \
105105
useradd -m -r -u 185 -g 185 -d /home/spark spark
106106

107-
# Env para PySpark
107+
# Env for Jupyter + PySpark
108108
ENV HOME=/home/spark \
109+
JUPYTER_PORT=8888 \
110+
JUPYTER_DIR=/opt/spark/work-dir/notebooks \
109111
PYSPARK_PYTHON=/usr/local/bin/python3.11 \
110112
PYSPARK_DRIVER_PYTHON=/usr/local/bin/python3.11 \
111113
PYTHONPATH="${SPARK_HOME}/python"
112114

113-
# 👉 COPIAR O CÓDIGO DO PROJECTO PARA DENTRO DA IMAGEM
114-
# (assumindo que fazes build a partir da raiz do repo com:
115-
# docker build -f Docker/Dockerfile .)
116-
COPY --chown=spark:spark src /opt/spark/src
115+
# PySpark + JupyterLab + libs
116+
RUN pip install --no-cache-dir \
117+
pyspark==3.5.7 \
118+
pandas \
119+
numpy \
120+
jupyterlab==4.2.5
117121

118-
# Garantir permissões
119-
RUN mkdir -p /opt/spark/work-dir && \
120-
chown -R spark:spark /opt/spark /home/spark
122+
# Dirs Jupyter + notebooks
123+
RUN mkdir -p "${JUPYTER_DIR}" \
124+
&& mkdir -p "${HOME}/.local/share/jupyter/runtime" \
125+
&& mkdir -p "${HOME}/.jupyter" \
126+
&& chown -R spark:spark /home/spark /opt/spark
121127

122128
USER 185
123-
WORKDIR /opt/spark/work-dir
129+
WORKDIR ${JUPYTER_DIR}
124130

125-
# 👉 ENTRYPOINT DE PRODUÇÃO: usar o entrypoint oficial do Spark
126-
# (que já copiámos do apache/spark lá em cima)
127-
ENTRYPOINT ["/opt/entrypoint.sh"]
131+
EXPOSE 8888
128132

129-
# Sem CMD fixo: o Spark Operator trata de passar o comando,
130-
# mainApplicationFile, etc., via SparkApplication
133+
# Default: start JupyterLab (K8s manifest pode override se quiser usar só spark-submit)
134+
ENTRYPOINT ["bash","-lc","jupyter lab --ip=0.0.0.0 --port=${JUPYTER_PORT} --no-browser --ServerApp.root_dir=${JUPYTER_DIR} --ServerApp.token='' --ServerApp.password=''"]

0 commit comments

Comments
 (0)