@@ -18,25 +18,31 @@ RUN apt update \
1818 && ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa \
1919 && cat ~/.ssh/id_rsa.pub > ~/.ssh/authorized_keys \
2020 && echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config \
21- && service ssh restart \
22- # Downloads and extracts Hadoop
23- && wget http://apache.dattatec.com/hadoop/common/hadoop-3.1.3/hadoop-3.1.3.tar.gz \
21+ && service ssh restart
22+
23+ # Downloads and extracts Hadoop
24+ RUN wget http://apache.dattatec.com/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz
25+
2426 # Configures Hadoop and removes downloaded .tar.gz file
25- && tar -xzvf hadoop-3.1.3 .tar.gz \
26- && mv hadoop-3.1.3 $HADOOP_HOME \
27+ RUN tar -xzvf hadoop-3.2.2 .tar.gz \
28+ && mv hadoop-3.2.2 $HADOOP_HOME \
2729 && echo 'export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")' >> $HADOOP_HOME/etc/hadoop/hadoop-env.sh \
2830 && echo 'export PATH=$PATH:$HADOOP_HOME/bin' >> ~/.bashrc \
2931 && echo 'export PATH=$PATH:$HADOOP_HOME/sbin' >> ~/.bashrc \
30- && rm hadoop-3.1.3.tar.gz
31- # Downloads Apache Spark
32- RUN wget apache.dattatec.com/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz \
33- # Decompress, adds to PATH and then removes .tgz Apache Spark file
34- && tar -xvzf spark-3.0.0-bin-hadoop2.7.tgz \
35- && mv spark-3.0.0-bin-hadoop2.7 sbin/ \
36- && echo 'export PATH=$PATH:/sbin/spark-3.0.0-bin-hadoop2.7/sbin/' >> ~/.bashrc \
37- && echo 'export PATH=$PATH:/sbin/spark-3.0.0-bin-hadoop2.7/bin/' >> ~/.bashrc \
38- && rm spark-3.0.0-bin-hadoop2.7.tgz
39- RUN mv ${HADOOP_STREAMING_HOME}/hadoop-streaming-3.1.3.jar ${HADOOP_STREAMING_HOME}/hadoop-streaming.jar \
32+ && rm hadoop-3.2.2.tar.gz
33+
34+ # Downloads Apache Spark
35+ RUN wget http://apache.dattatec.com/spark/spark-3.1.1/spark-3.1.1-bin-without-hadoop.tgz
36+
37+ # Decompress, adds to PATH and then removes .tgz Apache Spark file
38+ # NOTE: Spark bin folder goes first to prevent issues with /usr/local/bin duplicated binaries
39+ RUN tar -xvzf spark-3.1.1-bin-without-hadoop.tgz \
40+ && mv spark-3.1.1-bin-without-hadoop sbin/ \
41+ && echo 'export PATH=$PATH:/sbin/spark-3.1.1-bin-without-hadoop/sbin/' >> ~/.bashrc \
42+ && echo 'export PATH=/sbin/spark-3.1.1-bin-without-hadoop/bin/:$PATH' >> ~/.bashrc \
43+ && rm spark-3.1.1-bin-without-hadoop.tgz
44+
45+ RUN mv ${HADOOP_STREAMING_HOME}/hadoop-streaming-3.2.2.jar ${HADOOP_STREAMING_HOME}/hadoop-streaming.jar \
4046 && source ~/.bashrc
4147
4248# Installs some extra libraries
@@ -66,7 +72,7 @@ COPY ./config/mapred-site.xml .
6672COPY ./config/yarn-site.xml .
6773
6874# Spark settings
69- WORKDIR /sbin/spark-3.0.0 -bin-hadoop2.7 /conf/
75+ WORKDIR /sbin/spark-3.1.1 -bin-without-hadoop /conf/
7076COPY ./config/spark-env.sh .
7177COPY ./config/log4j.properties .
7278
0 commit comments