Skip to content
This repository was archived by the owner on Dec 4, 2019. It is now read-only.

Commit 44a7e84

Browse files
authored
Download Spark from mirrors; test vs newer Spark, libs (#90)
1 parent 7c59f5e commit 44a7e84

File tree

5 files changed

+64
-28
lines changed

5 files changed

+64
-28
lines changed

.travis.yml

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,29 @@
11
language: python
2-
python:
3-
- "2.7"
4-
- "3.4"
5-
- "3.5"
2+
63
cache:
74
directories:
85
- $HOME/.cache/spark-versions
9-
env:
10-
matrix:
11-
- SPARK_VERSION="2.1.1" SPARK_BUILD="spark-$SPARK_VERSION-bin-hadoop2.7" SPARK_BUILD_URL="http://d3kbcqa49mib13.cloudfront.net/$SPARK_BUILD.tgz"
12-
- SPARK_VERSION="2.2.0" SPARK_BUILD="spark-$SPARK_VERSION-bin-hadoop2.7" SPARK_BUILD_URL="http://d3kbcqa49mib13.cloudfront.net/$SPARK_BUILD.tgz"
6+
7+
matrix:
8+
include:
9+
- python: "2.7"
10+
env:
11+
- SPARK_VERSION=2.3.2
12+
- NUMPY_VERSION=1.11.1
13+
- PANDAS_VERSION=0.19.2
14+
- SCIKIT_VERSION=0.18.1
15+
- python: "3.5"
16+
env:
17+
- SPARK_VERSION=2.3.2
18+
- NUMPY_VERSION=1.11.1
19+
- PANDAS_VERSION=0.19.2
20+
- SCIKIT_VERSION=0.18.1
21+
- python: "3.6"
22+
env:
23+
- SPARK_VERSION=2.4.0
24+
- NUMPY_VERSION=1.14.3
25+
- PANDAS_VERSION=0.23.0
26+
- SCIKIT_VERSION=0.19.1
1327

1428
before_install:
1529
- ./bin/download_travis_dependencies.sh
@@ -20,9 +34,9 @@ install:
2034
# We do this conditionally because it saves us some downloading if the
2135
# version is the same.
2236
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
23-
wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
37+
curl -s https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh > miniconda.sh;
2438
else
25-
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
39+
curl -s https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh;
2640
fi
2741
- bash miniconda.sh -b -p $HOME/miniconda
2842
- export PATH="$HOME/miniconda/bin:$PATH"
@@ -33,8 +47,8 @@ install:
3347
- conda info -a
3448

3549
# Replace dep1 dep2 ... with your dependencies
36-
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scikit-learn==0.18.1 nose=1.3.7 pandas=0.18
50+
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION nose==1.3.7 numpy==$NUMPY_VERSION pandas==$PANDAS_VERSION scikit-learn==$SCIKIT_VERSION
3751
- source activate test-environment
3852

3953
script:
40-
- SPARK_HOME=$HOME/.cache/spark-versions/$SPARK_BUILD ./python/run-tests.sh
54+
- SPARK_HOME="$HOME/.cache/spark-versions/spark-$SPARK_VERSION-bin-hadoop2.7" ./python/run-tests.sh
Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,38 @@
1+
#!/usr/bin/env bash
2+
13
echo "Downloading Spark if necessary"
24
echo "Spark version = $SPARK_VERSION"
3-
echo "Spark build = $SPARK_BUILD"
4-
echo "Spark build URL = $SPARK_BUILD_URL"
5-
mkdir -p $HOME/.cache/spark-versions
6-
tarfilename="$HOME/.cache/spark-versions/$SPARK_BUILD.tgz"
7-
dirname="$HOME/.cache/spark-versions/$SPARK_BUILD"
8-
if ! [ -d $dirname ]; then
9-
echo "Missing $dirname, downloading archive"
10-
echo `which curl`
11-
curl "$SPARK_BUILD_URL" > $tarfilename
12-
tar xvf $tarfilename --directory $HOME/.cache/spark-versions > /dev/null
13-
echo "Content of directory:"
14-
ls -la $HOME/.cache/spark-versions/
5+
6+
sparkVersionsDir="$HOME/.cache/spark-versions"
7+
mkdir -p "$sparkVersionsDir"
8+
sparkBuild="spark-$SPARK_VERSION-bin-hadoop2.7"
9+
sparkBuildDir="$sparkVersionsDir/$sparkBuild"
10+
11+
if [[ -d "$sparkBuildDir" ]]; then
12+
echo "Skipping download - found Spark dir $sparkBuildDir"
1513
else
16-
echo "Skipping download - found spark dir $dirname"
14+
echo "Missing $sparkBuildDir, downloading archive"
15+
16+
# Get a local ASF mirror, as HTTPS
17+
function apache_mirror() {
18+
local mirror_url=$(curl -s https://www.apache.org/dyn/closer.lua?preferred=true)
19+
echo "${mirror_url/http:/https:}"
20+
}
21+
# Try mirrors then fall back to dist.apache.org
22+
for mirror in $(apache_mirror) $(apache_mirror) $(apache_mirror) https://dist.apache.org/repos/dist/release/; do
23+
# If not already found,
24+
if ! [[ -d "$sparkBuildDir" ]]; then
25+
sparkURL="$mirror/spark/spark-$SPARK_VERSION/$sparkBuild.tgz"
26+
echo "Downloading $sparkURL ..."
27+
# Test whether it's reachable
28+
if curl -s -I -f -o /dev/null "$sparkURL"; then
29+
curl -s "$sparkURL" | tar xz --directory "$sparkVersionsDir"
30+
else
31+
echo "Could not reach $sparkURL"
32+
fi
33+
fi
34+
done
35+
36+
echo "Content of $sparkBuildDir:"
37+
ls -la "$sparkBuildDir"
1738
fi

project/build.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
// This file should only contain the version of sbt to use.
2-
sbt.version=0.13.6
2+
sbt.version=0.13.17

python/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# This file should list any python package dependencies.
2-
scikit-learn>=0.18.1, <=0.19
2+
scikit-learn>=0.18.1

python/spark_sklearn/keyed_models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
<BLANKLINE>
3939
>>> km = KeyedEstimator(sklearnEstimator=LinearRegression(), yCol="y").fit(df)
4040
>>> def printFloat(x):
41-
... return "{:.2f}".format(round(x, 2))
41+
... rounded = round(x, 2)
42+
... return "{:.2f}".format(0 if rounded == 0 else rounded)
4243
...
4344
>>> def printModel(model):
4445
... coef = "[" + ", ".join(map(printFloat, model.coef_)) + "]"

0 commit comments

Comments
 (0)