Skip to content

Commit b9aacda

Browse files
committed
+ Updated Hadoop to 3.2.2
+ Updated Spark to 3.1.1
1 parent 3d14fcf commit b9aacda

File tree

3 files changed

+28
-21
lines changed

3 files changed

+28
-21
lines changed

Dockerfile

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,31 @@ RUN apt update \
1818
&& ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa \
1919
&& cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys \
2020
&& echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config \
21-
&& service ssh restart \
22-
# Downloads and extracts Hadoop
23-
&& wget http://apache.dattatec.com/hadoop/common/hadoop-3.1.3/hadoop-3.1.3.tar.gz \
21+
&& service ssh restart
22+
23+
# Downloads and extracts Hadoop
24+
RUN wget http://apache.dattatec.com/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz
25+
2426
# Configures Hadoop and removes downloaded .tar.gz file
25-
&& tar -xzvf hadoop-3.1.3.tar.gz \
26-
&& mv hadoop-3.1.3 $HADOOP_HOME \
27+
RUN tar -xzvf hadoop-3.2.2.tar.gz \
28+
&& mv hadoop-3.2.2 $HADOOP_HOME \
2729
&& echo 'export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")' >> $HADOOP_HOME/etc/hadoop/hadoop-env.sh \
2830
&& echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> ~/.bashrc \
2931
&& echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> ~/.bashrc \
30-
&& rm hadoop-3.1.3.tar.gz
31-
# Downloads Apache Spark
32-
RUN wget apache.dattatec.com/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz \
33-
# Decompress, adds to PATH and then removes .tgz Apache Spark file
34-
&& tar -xvzf spark-3.0.0-bin-hadoop2.7.tgz \
35-
&& mv spark-3.0.0-bin-hadoop2.7 sbin/ \
36-
&& echo 'export PATH=$PATH:/sbin/spark-3.0.0-bin-hadoop2.7/sbin/' >> ~/.bashrc \
37-
&& echo 'export PATH=$PATH:/sbin/spark-3.0.0-bin-hadoop2.7/bin/' >> ~/.bashrc \
38-
&& rm spark-3.0.0-bin-hadoop2.7.tgz
39-
RUN mv ${HADOOP_STREAMING_HOME}/hadoop-streaming-3.1.3.jar ${HADOOP_STREAMING_HOME}/hadoop-streaming.jar \
32+
&& rm hadoop-3.2.2.tar.gz
33+
34+
# Downloads Apache Spark
35+
RUN wget http://apache.dattatec.com/spark/spark-3.1.1/spark-3.1.1-bin-without-hadoop.tgz
36+
37+
# Decompress, adds to PATH and then removes .tgz Apache Spark file
38+
# NOTE: Spark bin folder goes first to prevent issues with /usr/local/bin duplicated binaries
39+
RUN tar -xvzf spark-3.1.1-bin-without-hadoop.tgz \
40+
&& mv spark-3.1.1-bin-without-hadoop sbin/ \
41+
&& echo 'export PATH=$PATH:/sbin/spark-3.1.1-bin-without-hadoop/sbin/' >> ~/.bashrc \
42+
&& echo 'export PATH=/sbin/spark-3.1.1-bin-without-hadoop/bin/:$PATH' >> ~/.bashrc \
43+
&& rm spark-3.1.1-bin-without-hadoop.tgz
44+
45+
RUN mv ${HADOOP_STREAMING_HOME}/hadoop-streaming-3.2.2.jar ${HADOOP_STREAMING_HOME}/hadoop-streaming.jar \
4046
&& source ~/.bashrc
4147

4248
# Installs some extra libraries
@@ -66,7 +72,7 @@ COPY ./config/mapred-site.xml .
6672
COPY ./config/yarn-site.xml .
6773

6874
# Spark settings
69-
WORKDIR /sbin/spark-3.0.0-bin-hadoop2.7/conf/
75+
WORKDIR /sbin/spark-3.1.1-bin-without-hadoop/conf/
7076
COPY ./config/spark-env.sh .
7177
COPY ./config/log4j.properties .
7278

config/spark-cmd.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,19 @@ sleep 5
99

1010
if [[ $1 = "start" ]]; then
1111
if [[ $2 = "master-node" ]]; then
12-
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/start-master.sh
12+
/sbin/spark-3.1.1-bin-without-hadoop/sbin/start-master.sh
1313
sleep infinity
1414
exit
1515
fi
16-
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/start-slave.sh master-node:7077
16+
/sbin/spark-3.1.1-bin-without-hadoop/sbin/start-worker.sh master-node:7077
1717
sleep infinity
1818
exit
1919
fi
2020

2121
if [[ $1 = "stop" ]]; then
2222
if [[ $2 = "master-node" ]]; then
23-
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/stop-master.sh
23+
/sbin/spark-3.1.1-bin-without-hadoop/sbin/stop-master.sh
2424
exit
2525
fi
26-
/sbin/spark-3.0.0-bin-hadoop2.7/sbin/stop-slave.sh
26+
/sbin/spark-3.1.1-bin-without-hadoop/sbin/stop-worker.sh
2727
fi

config/spark-env.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
1+
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
2+
export SPARK_DIST_CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath)

0 commit comments

Comments
 (0)