1515# Script inspired by: https://github.com/jupyter/docker-stacks/blob/master/pyspark-notebook/Dockerfile#L18
1616# https://github.com/apache/incubator-toree/blob/master/Dockerfile
1717
18+
19+ # Todo: Add additional spark configuration:
20+ # https://spark.apache.org/docs/latest/configuration.html
21+ # TODO start spark master?
22+ # https://medium.com/@marcovillarreal_40011/creating-a-spark-standalone-cluster-with-docker-and-docker-compose-ba9d743a157f
23+ # ENV SPARK_MASTER_PORT 7077
24+ # ENV SPARK_MASTER_WEBUI_PORT 8080
25+ # ENV SPARK_WORKER_WEBUI_PORT 8081
26+ # ENV SPARK_MASTER_LOG /spark/logs
27+ # ENV SPARK_WORKER_LOG /spark/logs
28+ # export SPARK_MASTER_HOST=`hostname`
29+ # SPARK_WORKER_CORES=1
30+ # SPARK_WORKER_MEMORY=1G
31+ # SPARK_DRIVER_MEMORY=128m
32+ # SPARK_EXECUTOR_MEMORY=256m
33+
34+ # TODO configure spark ui to be proxied with base path:
35+ # https://stackoverflow.com/questions/45971127/wrong-css-location-of-spark-application-ui
36+ # https://github.com/jupyterhub/jupyter-server-proxy/issues/57
37+ # https://github.com/yuvipanda/jupyter-sparkui-proxy/blob/master/jupyter_sparkui_proxy/__init__.py
38+
39+
1840# Install scala 2.12
1941if [[ ! $( scala -version 2>&1 ) =~ " version 2.12" ]]; then
2042 # Update to Scala 2.12 is required for spark
21- SCALA_VERSION=2.12.12
22- echo " Updating to Scala $SCALA_VERSION . Please wait..."
23- apt-get remove scala-library scala
24- apt-get autoremove
25- wget -q https://downloads.lightbend.com/scala/$SCALA_VERSION /scala-$SCALA_VERSION .deb -O ./scala.deb
26- dpkg -i scala.deb
27- rm scala.deb
28- apt-get update
29- apt-get install scala
43+ echo " Scala 2.12 is not installed. You should consider running the scala-utils.sh tool installer before continuing."
44+ sleep 10
3045else
3146 echo " Scala 2.12 already installed."
3247fi
@@ -38,7 +53,8 @@ if [ ! -d "$SPARK_HOME" ]; then
3853 cd $RESOURCES_PATH
3954 SPARK_VERSION=" 3.0.1"
4055 HADOOP_VERSION=" 3.2"
41- wget https://mirror.checkdomain.de/apache/spark/spark-$SPARK_VERSION /spark-$SPARK_VERSION -bin-hadoop$HADOOP_VERSION .tgz -O ./spark.tar.gz
56+ echo " Downloading. Please wait..."
57+ wget -q https://mirror.checkdomain.de/apache/spark/spark-$SPARK_VERSION /spark-$SPARK_VERSION -bin-hadoop$HADOOP_VERSION .tgz -O ./spark.tar.gz
4258 tar xzf spark.tar.gz
4359 mv spark-$SPARK_VERSION -bin-hadoop$HADOOP_VERSION / $SPARK_HOME
4460 rm spark.tar.gz
@@ -55,12 +71,12 @@ if [ ! -d "$SPARK_HOME" ]; then
5571 pip install --no-cache-dir sparkmagic
5672 jupyter serverextension enable --py sparkmagic
5773
58- # Install sparkmonitor: https://github.com/krishnan-r/sparkmonitor
59- pip install --no-cache-dir sparkmonitor
60- jupyter nbextension install sparkmonitor --py --sys-prefix --symlink
61- jupyter nbextension enable sparkmonitor --py --sys-prefix
62- jupyter serverextension enable --py --sys-prefix sparkmonitor
63- ipython profile create && echo " c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $( ipython profile locate default) /ipython_kernel_config.py
74+ # TODO: does not work right now: Install sparkmonitor: https://github.com/krishnan-r/sparkmonitor
75+ # pip install --no-cache-dir sparkmonitor
76+ # jupyter nbextension install sparkmonitor --py --sys-prefix --symlink
77+ # jupyter nbextension enable sparkmonitor --py --sys-prefix
78+ # jupyter serverextension enable --py --sys-prefix sparkmonitor
79+ # ipython profile create && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py
6480
6581 # Deprecated: jupyter-spark: https://github.com/mozilla/jupyter-spark
6682 # jupyter serverextension enable --py jupyter_spark && \
@@ -85,8 +101,10 @@ pip install --no-cache-dir pyspark findspark pyarrow spylon-kernel
85101if [[ ! $( jupyter kernelspec list) =~ " toree" ]]; then
86102 echo " Installing Toree Kernel for Jupyter. Please wait..."
87103 TOREE_VERSION=0.5.0
88- pip install --no-cache-dir https://dist.apache.org/repos/dist/dev/incubator/toree/$TOREE_VERSION -incubating-rc1/toree-pip/toree-$TOREE_VERSION .tar.gz
89- jupyter toree install --sys-prefix --spark_home=$SPARK_HOME
104+ echo " Torre Kernel does not seem to work with the installed spark and scala verison."
105+ # TODO: Fix installation
106+ # pip install --no-cache-dir https://dist.apache.org/repos/dist/dev/incubator/toree/$TOREE_VERSION-incubating-rc1/toree-pip/toree-$TOREE_VERSION.tar.gz
107+ # jupyter toree install --sys-prefix --spark_home=$SPARK_HOME
90108else
91109 echo " Toree Kernel for Jupyter is already installed."
92110fi
0 commit comments