Skip to content

Commit 5d37494

Browse files
author
Phil Varner
committed
merge pyspark-notebook:spark-2.3.4-hadoop-2.7 Dockerfile into this one
Signed-off-by: Phil Varner <[email protected]>
1 parent 15581e6 commit 5d37494

File tree

2 files changed

+39
-13
lines changed

2 files changed

+39
-13
lines changed

rf-notebook/build.sbt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Docker / dockerGenerateConfig := (Docker / sourceDirectory).value / "Dockerfile"
4444

4545
// Save a bit of typing...
4646
publishLocal := (Docker / publishLocal).value
47+
publish := (Docker / publish).value
4748

4849
// -----== Conveniences ==-----
4950

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,55 @@
1-
FROM s22s/pyspark-notebook:spark-2.3.4-hadoop-2.7
1+
FROM jupyter/scipy-notebook:latest
22

33
MAINTAINER Astraea, Inc.
44

5+
EXPOSE 4040 4041 4042 4043 4044
6+
57
ENV RF_LIB_LOC=/usr/local/rasterframes \
68
LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"
79

810
USER root
911

1012
RUN mkdir $RF_LIB_LOC
1113

12-
EXPOSE 4040 4041 4042 4043 4044
13-
14-
# Sphinx (for Notebook->html)
15-
RUN conda install --quiet --yes \
16-
anaconda sphinx nbsphinx shapely numpy folium geopandas geojsonio rasterio descartes
17-
18-
# Cleanup pip residuals
19-
RUN rm -rf /home/$NB_USER/.local && \
20-
fix-permissions /home/$NB_USER && \
21-
fix-permissions $CONDA_DIR
14+
RUN apt-get -y update && \
15+
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
16+
apt-get clean && \
17+
rm -rf /var/lib/apt/lists/*
18+
19+
# Spark dependencies
20+
ENV APACHE_SPARK_VERSION 2.3.4
21+
ENV HADOOP_VERSION 2.7
22+
23+
RUN cd /tmp && \
24+
wget -q http://apache.mirrors.pair.com/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
25+
echo "9FBEFCE2739990FFEDE6968A9C2F3FE399430556163BFDABDF5737A8F9E52CD535489F5CA7D641039A87700F50BFD91A706CA47979EE51A3A18787A92E2D6D53 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
26+
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
27+
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
28+
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
29+
30+
# Spark config
31+
ENV SPARK_HOME /usr/local/spark
32+
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
33+
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
34+
35+
# Sphinx (for Notebook->html) and pyarrow (from pyspark build)
36+
RUN conda install --quiet --yes pyarrow \
37+
anaconda sphinx nbsphinx shapely numpy folium geopandas geojsonio rasterio descartes \
38+
&& conda clean --all \
39+
&& rm -rf /home/$NB_USER/.local \
40+
&& find /opt/conda/ -type f,l -name '*.a' -delete \
41+
&& find /opt/conda/ -type f,l -name '*.pyc' -delete \
42+
&& find /opt/conda/ -type f,l -name '*.js.map' -delete \
43+
&& find /opt/conda/lib/python*/site-packages/bokeh/server/static -type f,l -name '*.js' -not -name '*.min.js' -delete \
44+
&& rm -rf /opt/conda/pkgs \
45+
&& fix-permissions $CONDA_DIR \
46+
&& fix-permissions /home/$NB_USER
2247

2348
COPY *.whl $RF_LIB_LOC
2449
COPY jupyter_notebook_config.py $HOME/.jupyter
2550
COPY examples $HOME/examples
2651

27-
RUN ls -1 $RF_LIB_LOC/*.whl | xargs pip install
52+
RUN ls -1 $RF_LIB_LOC/*.whl | xargs pip install --no-cache-dir
2853
RUN chmod -R +w $HOME/examples && chown -R $NB_UID:$NB_GID $HOME
2954

30-
USER $NB_UID
55+
USER $NB_UID

0 commit comments

Comments
 (0)