Skip to content

Commit 5b6a08a

Browse files
authored
Merge pull request spark-jobserver#639 from instructure/2.0
Get latest master working with Spark 2.0
2 parents f232aaf + 43f7346 commit 5b6a08a

File tree

21 files changed

+139
-86
lines changed

21 files changed

+139
-86
lines changed

.dockerignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/target
2+
*/target
3+
**/target
4+
.git
5+
.ensime
6+
.ensime_cache

.travis.yml

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,35 @@
1-
language: scala
21
sudo: required
3-
dist: trusty
2+
43
env:
54
global:
6-
- _JAVA_OPTIONS="-Xmx1500m -XX:MaxPermSize=512m -Dakka.test.timefactor=3"
7-
- SPARK_HOME=/tmp/spark-1.6.2-bin-hadoop2.6
8-
scala:
9-
- 2.10.6
10-
- 2.11.8
11-
jdk:
12-
- oraclejdk8
13-
- oraclejdk7
5+
- DOCKER_IMAGE="spark-jobserver:ci"
6+
7+
language: go
8+
go:
9+
- 1.7
10+
11+
services:
12+
- docker
13+
14+
cache:
15+
directories:
16+
- docker
17+
1418
before_install:
15-
- sudo apt-get -qq update
16-
- sudo apt-get -y install python3 python3-pip
17-
- ci/install-python-dependencies.sh
18-
- ci/install-spark.sh
19+
- go get github.com/tonistiigi/buildcache/cmd/buildcache
20+
21+
install:
22+
- if [ -e docker/latest_image.tar.gz ]; then
23+
docker load -i docker/latest_image.tar.gz;
24+
fi
25+
- if [ -e docker/latest_cache.tar.gz ]; then
26+
docker load -i docker/latest_cache.tar.gz;
27+
fi
28+
- echo "docker/" >> .dockerignore
29+
- docker build -f Dockerfile.test -t $DOCKER_IMAGE .
30+
- docker save $DOCKER_IMAGE | gzip > docker/latest_image.tar.gz
31+
- sudo `which buildcache` save -o docker/latest_cache.tar.gz $DOCKER_IMAGE
32+
- sudo chown $USER docker/latest_cache.tar.gz
33+
1934
script:
20-
- sbt clean coverage testPython test coverageReport
21-
- find job-server-python/src/python -name *.py -exec pep8 {} +
22-
after_success:
23-
- bash <(curl -s https://codecov.io/bash)
35+
- docker run --rm -t -i $DOCKER_IMAGE

Dockerfile.test

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
FROM instructure/java:8
2+
3+
USER root
4+
# install and cache sbt, python
5+
6+
RUN echo 'deb http://dl.bintray.com/sbt/debian /' > /etc/apt/sources.list.d/sbt.list && \
7+
apt-get -qq update && \
8+
apt-get install -y --force-yes python3 python3-pip python-pip sbt=0.13.8 && \
9+
sbt
10+
# running sbt downloads some of its internals, speed up sebsequent sbt runs
11+
12+
WORKDIR /usr/src/app/
13+
14+
# install other ci deps
15+
COPY ci ci
16+
RUN ci/install-python-dependencies.sh && \
17+
ci/install-spark.sh
18+
19+
# add sbt and cache deps
20+
COPY project project
21+
COPY build.sbt .
22+
RUN sbt update
23+
24+
# add the rest of the code
25+
COPY . .
26+
27+
ENV SPARK_HOME /tmp/spark-2.0.1-bin-hadoop2.7
28+
ENV JAVA_OPTIONS "-Xmx1500m -XX:MaxPermSize=512m -Dakka.test.timefactor=3"
29+
30+
CMD ["/usr/src/app/run_tests.sh"]

build.sbt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,8 @@ lazy val dockerSettings = Seq(
116116

117117
val sparkBuild = s"spark-$sparkVersion"
118118
val sparkBuildCmd = scalaBinaryVersion.value match {
119-
case "2.10" =>
120-
"./make-distribution.sh -Phadoop-2.4 -Phive"
121119
case "2.11" =>
122-
"""
123-
|./dev/change-scala-version.sh 2.11 && \
124-
|./make-distribution.sh -Dscala-2.11 -Phadoop-2.4 -Phive
125-
""".stripMargin.trim
120+
"./make-distribution.sh -Dscala-2.11 -Phadoop-2.7 -Phive"
126121
case other => throw new RuntimeException(s"Scala version $other is not supported!")
127122
}
128123

@@ -170,7 +165,13 @@ lazy val dockerSettings = Seq(
170165
imageNames in docker := Seq(
171166
sbtdocker.ImageName(namespace = Some("velvia"),
172167
repository = "spark-jobserver",
173-
tag = Some(s"${version.value}.mesos-${mesosVersion.split('-')(0)}.spark-$sparkVersion.scala-${scalaBinaryVersion.value}"))
168+
tag = Some(
169+
s"${version.value}" +
170+
s".mesos-${mesosVersion.split('-')(0)}" +
171+
s".spark-$sparkVersion" +
172+
s".scala-${scalaBinaryVersion.value}" +
173+
s".jdk-$javaVersion")
174+
)
174175
)
175176
)
176177

@@ -208,8 +209,7 @@ lazy val runScalaStyle = taskKey[Unit]("testScalaStyle")
208209
lazy val commonSettings = Defaults.coreDefaultSettings ++ dirSettings ++ implicitlySettings ++ Seq(
209210
organization := "spark.jobserver",
210211
crossPaths := true,
211-
crossScalaVersions := Seq("2.10.6","2.11.8"),
212-
scalaVersion := sys.env.getOrElse("SCALA_VERSION", "2.10.6"),
212+
scalaVersion := sys.env.getOrElse("SCALA_VERSION", "2.11.8"),
213213
dependencyOverrides += "org.scala-lang" % "scala-compiler" % scalaVersion.value,
214214
publishTo := Some(Resolver.file("Unused repo", file("target/unusedrepo"))),
215215
// scalastyleFailOnError := true,
@@ -250,4 +250,4 @@ lazy val publishSettings = Seq(
250250

251251
// This is here so we can easily switch back to Logback when Spark fixes its log4j dependency.
252252
lazy val jobServerLogbackLogging = "-Dlogback.configurationFile=config/logback-local.xml"
253-
lazy val jobServerLogging = "-Dlog4j.configuration=file:config/log4j-local.properties"
253+
lazy val jobServerLogging = "-Dlog4j.configuration=file:config/log4j-local.properties"

ci/install-python-dependencies.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env bash
2+
set -e
23
pip install --upgrade pip
34
pip install --user pyhocon
45
pip3 install --user pyhocon

ci/install-spark.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#!/usr/bin/env bash
2-
curl -o /tmp/spark.tgz http://apache.mirror.anlx.net/spark/spark-1.6.2/spark-1.6.2-bin-hadoop2.6.tgz
2+
set -e
3+
curl -L -o /tmp/spark.tgz http://d3kbcqa49mib13.cloudfront.net/spark-2.0.1-bin-hadoop2.7.tgz
34
tar -xvzf /tmp/spark.tgz -C /tmp

job-server-extras/src/main/scala/spark/jobserver/HiveTestJob.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ object HiveLoaderJob extends SparkHiveJob {
2525
val tableCreate = "CREATE TABLE `default`.`test_addresses`"
2626
val tableArgs = "(`firstName` String, `lastName` String, `address` String, `city` String)"
2727
val tableRowFormat = "ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'"
28-
val tableColFormat = "COLLECTION ITEMS TERMINATED BY '\002'"
29-
val tableMapFormat = "MAP KEYS TERMINATED BY '\003' STORED"
28+
val tableColFormat = "COLLECTION ITEMS TERMINATED BY '\u0002'"
29+
val tableMapFormat = "MAP KEYS TERMINATED BY '\u0003' STORED"
3030
val tableAs = "AS TextFile"
3131

3232
val loadPath = s"'src/main/resources/hive_test_job_addresses.txt'"

job-server-extras/src/main/scala/spark/jobserver/context/HiveContextFactory.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ package spark.jobserver.context
33
import com.typesafe.config.Config
44
import org.apache.spark.{SparkConf, SparkContext}
55
import org.apache.spark.sql.hive.HiveContext
6-
import spark.jobserver.{api, ContextLike, SparkHiveJob}
7-
import spark.jobserver.util.SparkJobUtils
6+
import spark.jobserver.{ContextLike, SparkHiveJob, api}
87

98
class HiveContextFactory extends ScalaContextFactory {
109
type C = HiveContext with ContextLike

job-server-extras/src/main/scala/spark/jobserver/python/JobEndpoint.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package spark.jobserver.python
22

33
import com.typesafe.config.{ConfigRenderOptions, Config}
44
import org.apache.spark.SparkConf
5-
import spark.jobserver.api.JobEnvironment
65
import scala.collection.JavaConverters._
76

87
/**
@@ -15,14 +14,15 @@ import scala.collection.JavaConverters._
1514
* The Spark Job Server python subprocess assumes the endpoint to be an implementation of this Trait,
1615
* and attempts to access fields and methods accordingly.
1716
*/
18-
case class JobEndpoint[C <: PythonContextLike](context: C,
19-
sparkConf: SparkConf,
20-
contextConfig: Config,
21-
jobId: String,
22-
jobConfig: Config,
23-
jobClass: String,
24-
py4JImports: Seq[String]
25-
){
17+
case class JobEndpoint[C <: PythonContextLike](
18+
context: C,
19+
sparkConf: SparkConf,
20+
contextConfig: Config,
21+
jobId: String,
22+
jobConfig: Config,
23+
jobClass: String,
24+
py4JImports: Seq[String]
25+
){
2626

2727
/**
2828
* @return The contextConfig, which is a Typesafe Config object, serialized to HOCON,

job-server-extras/src/main/scala/spark/jobserver/python/PythonJob.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import spark.jobserver.api.{SparkJobBase, ValidationProblem, JobEnvironment}
88

99
import scala.sys.process.{ProcessLogger, Process}
1010
import scala.util.{Failure, Success, Try}
11-
import scala.collection.JavaConverters._
1211

1312
case class PythonJob[X <: PythonContextLike](eggPath: String,
1413
modulePath:String,
@@ -24,8 +23,6 @@ case class PythonJob[X <: PythonContextLike](eggPath: String,
2423
JobEndpoint(context, sparkConf, contextConfig, jobId, jobConfig, modulePath, py4JImports)
2524
}
2625

27-
def gateway(endpoint: JobEndpoint[C]): GatewayServer = new GatewayServer(endpoint, 0)
28-
2926
/**
3027
*
3128
* To support a useful validate method here for Python jobs we would have call two python processes,

0 commit comments

Comments
 (0)