Skip to content

Commit dfd9ea4

Browse files
update docker image
1 parent 1adc5ef commit dfd9ea4

File tree

1 file changed

+1
-108
lines changed

1 file changed

+1
-108
lines changed

Docker/Dockerfile

Lines changed: 1 addition & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,4 @@
1-
# syntax=docker/dockerfile:1
2-
3-
###########################################
4-
# Stage 1: Build Python 3.11.6 from source
5-
###########################################
6-
FROM ubuntu:22.04 AS python-build
7-
8-
ENV DEBIAN_FRONTEND=noninteractive
9-
ENV PYTHON_VERSION=3.11.6
10-
ENV PREFIX=/usr/local
11-
12-
RUN apt-get update && apt-get install -y \
13-
build-essential \
14-
wget \
15-
zlib1g-dev \
16-
libncurses5-dev \
17-
libgdbm-dev \
18-
libnss3-dev \
19-
libssl-dev \
20-
libreadline-dev \
21-
libffi-dev \
22-
libsqlite3-dev \
23-
libbz2-dev \
24-
&& rm -rf /var/lib/apt/lists/*
25-
26-
WORKDIR /usr/src
27-
28-
RUN wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
29-
&& tar -xzf Python-${PYTHON_VERSION}.tgz
30-
31-
WORKDIR /usr/src/Python-${PYTHON_VERSION}
32-
33-
RUN ./configure --enable-optimizations --prefix=${PREFIX} \
34-
&& make -j"$(nproc)" \
35-
&& make altinstall
36-
37-
RUN ln -sf ${PREFIX}/bin/python3.11 /usr/local/bin/python \
38-
&& ln -sf ${PREFIX}/bin/pip3.11 /usr/local/bin/pip
39-
40-
###########################################
41-
# Stage 2: Get entrypoint from official Spark
42-
###########################################
43-
FROM apache/spark:3.5.7 AS spark-official
44-
45-
###########################################
46-
# Stage 3: Spark + Delta + Cloud connectors
47-
###########################################
48-
FROM ubuntu:22.04 AS spark-base
49-
50-
ARG SPARK_VERSION=3.5.7
51-
ARG HADOOP_VERSION=3
52-
ARG DELTA_VERSION=3.2.1
53-
54-
ENV DEBIAN_FRONTEND=noninteractive
55-
ENV SPARK_HOME=/opt/spark
56-
ENV PATH="${SPARK_HOME}/bin:${PATH}"
57-
58-
# Java + utils
59-
RUN apt-get update && apt-get install -y \
60-
openjdk-11-jdk \
61-
curl \
62-
wget \
63-
bash \
64-
tini \
65-
ca-certificates \
66-
procps \
67-
&& rm -rf /var/lib/apt/lists/*
68-
69-
# Copy Python from build stage
70-
COPY --from=python-build /usr/local /usr/local
71-
72-
# Copy entrypoint scripts from official Spark image
73-
COPY --from=spark-official /opt/entrypoint.sh /opt/entrypoint.sh
74-
COPY --from=spark-official /opt/decom.sh /opt/decom.sh
75-
RUN chmod +x /opt/entrypoint.sh /opt/decom.sh
76-
77-
# Download Apache Spark prebuilt for Hadoop 3
78-
WORKDIR /opt
79-
RUN wget https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
80-
&& tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
81-
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \
82-
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
83-
84-
# Add connectors (Delta, AWS, Azure, MySQL)
85-
WORKDIR ${SPARK_HOME}/jars
86-
RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar && \
87-
wget https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.375/aws-java-sdk-bundle-1.12.375.jar && \
88-
wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-azure/3.3.4/hadoop-azure-3.3.4.jar && \
89-
wget https://repo1.maven.org/maven2/com/microsoft/azure/azure-storage/8.6.6/azure-storage-8.6.6.jar && \
90-
wget https://repo1.maven.org/maven2/com/azure/azure-storage-blob/12.24.0/azure-storage-blob-12.24.0.jar && \
91-
wget https://repo1.maven.org/maven2/com/azure/azure-identity/1.7.0/azure-identity-1.7.0.jar && \
92-
wget https://repo1.maven.org/maven2/com/azure/azure-core/1.42.0/azure-core-1.42.0.jar && \
93-
wget https://repo1.maven.org/maven2/io/delta/delta-spark_2.12/${DELTA_VERSION}/delta-spark_2.12-${DELTA_VERSION}.jar && \
94-
wget https://repo1.maven.org/maven2/io/delta/delta-storage/${DELTA_VERSION}/delta-storage-${DELTA_VERSION}.jar && \
95-
wget https://repo1.maven.org/maven2/io/delta/delta-kernel-api/${DELTA_VERSION}/delta-kernel-api-${DELTA_VERSION}.jar && \
96-
wget https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.3.0/mysql-connector-j-8.3.0.jar
97-
98-
###########################################
99-
# Stage 4: Final runtime image for K8s + Jupyter
100-
###########################################
101-
FROM spark-base AS final
102-
103-
# Non-root user with home dir
104-
RUN groupadd -r -g 185 spark && \
105-
useradd -m -r -u 185 -g 185 -d /home/spark spark
1+
FROM nauedu/nau-analytics-base-spark:latest
1062

1073
# Env for Jupyter + PySpark
1084
ENV HOME=/home/spark \
@@ -114,9 +10,6 @@ ENV HOME=/home/spark \
11410

11511
# PySpark + JupyterLab + libs
11612
RUN pip install --no-cache-dir \
117-
pyspark==3.5.7 \
118-
pandas \
119-
numpy \
12013
jupyterlab==4.2.5
12114

12215
# Dirs Jupyter + notebooks

0 commit comments

Comments
 (0)