Merge pull request spark-jobserver#183 from spark-jobserver/velvia/fix-streaming-docs

velvia · velvia · commit 59b83074c6ce · 2015-06-29T22:45:02.000-07:00
A docs fix, and enable env vars in Typesafe configs
diff --git a/README.md b/README.md
@@ -19,6 +19,7 @@ See [Troubleshooting Tips](doc/troubleshooting.md) as well as [Yarn tips](doc/ya
 - Fuse Elements
 - Frontline Solvers
 - Aruba Networks
+- [Zed Worldwide](www.zed.com)
 
 ## Features
 
@@ -48,7 +49,7 @@ For release notes, look in the `notes/` directory.  They should also be up on [l
 
 ## Quick start / development mode
 
-NOTE: This quick start guide uses SBT to run the job server and the included test jar, but the normal development process is to create a separate project for Job Server jobs and to deploy the job server to a Spark cluster.
+NOTE: This quick start guide uses SBT to run the job server and the included test jar, but the normal development process is to create a separate project for Job Server jobs and to deploy the job server to a Spark cluster.  Please see the deployment section below for more details.
 
 You need to have [SBT](http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html) installed.
 
@@ -281,6 +282,8 @@ the REST API.
     DELETE /jobs/<jobId>     - Kills the specified job
     GET /jobs/<jobId>/config - Gets the job configuration
 
+For details on the Typesafe config format used for input (JSON also works), see the [Typesafe Config docs](https://github.com/typesafehub/config).
+
 ### Context configuration
 
 A number of context-specific settings can be controlled when creating a context (POST /contexts) or running an
diff --git a/doc/contexts.md b/doc/contexts.md
@@ -40,9 +40,11 @@ This can be done easily by extending the `SparkContextFactory` trait, like `SQLC
 
 If you wish to use the `SQLContext` or `HiveContext`, be sure to pull down the job-server-extras package.
 
-# StreamingContext
-job-server-extras provides a context to run Spark Streaming jobs, there are a couple of configurations you can change in job-server's .conf file
-streaming.batch_interval: the streaming batch in millis
-streaming.stopGracefully: if true, stops gracefully by waiting for the processing of all received data to be completed 
-streaming.stopSparkContext: if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be stopped regardless of whether the StreamingContext has been started.
+## StreamingContext
+
+`job-server-extras` provides a context to run Spark Streaming jobs. There are a couple of configurations you can change in job-server's .conf file:
+
+* `streaming.batch_interval`: the streaming batch in millis
+* `streaming.stopGracefully`: if true, stops gracefully by waiting for the processing of all received data to be completed 
+* `streaming.stopSparkContext`: if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be stopped regardless of whether the StreamingContext has been started.
 
diff --git a/job-server-extras/src/spark.jobserver/StreamingTestJob.scala b/job-server-extras/src/spark.jobserver/StreamingTestJob.scala
@@ -7,9 +7,6 @@ import org.apache.spark.streaming.StreamingContext
 
 import scala.collection.mutable
 
-/** :: TestObject ::
- * A Streaming job for testing, will
- */
 @VisibleForTesting
 object StreamingTestJob extends SparkStramingJob {
   def validate(ssc: StreamingContext, config: Config): SparkJobValidation = SparkJobValid
diff --git a/job-server/src/main/resources/application.conf b/job-server/src/main/resources/application.conf
@@ -54,13 +54,18 @@ spark {
     # Determines the type of jobs that can run in a SparkContext
     context-factory = spark.jobserver.context.DefaultSparkContextFactory
 
-    # Default batch interval for Spark Streaming contexts in milliseconds
-    streaming.batch_interval = 1000
-    # if true, stops gracefully by waiting for the processing of all received data to be completed
-    streaming.stopGracefully = true
-    # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be
-    # stopped regardless of whether the StreamingContext has been started.
-    streaming.stopSparkContext = true
+
+    streaming {
+      # Default batch interval for Spark Streaming contexts in milliseconds
+      batch_interval = 1000
+
+      # if true, stops gracefully by waiting for the processing of all received data to be completed
+      stopGracefully = true
+
+      # if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be
+      # stopped regardless of whether the StreamingContext has been started.
+      stopSparkContext = true
+    }
 
     # uris of jars to be loaded into the classpath for this context. Uris is a string list, or a string separated by commas ','
     # dependent-jar-uris = ["file:///some/path/present/in/each/mesos/slave/somepackage.jar"]
diff --git a/job-server/src/spark.jobserver/JobServer.scala b/job-server/src/spark.jobserver/JobServer.scala
@@ -36,7 +36,7 @@ object JobServer {
         println("Could not find configuration file " + configFile)
         sys.exit(1)
       }
-      ConfigFactory.parseFile(configFile).withFallback(defaultConfig)
+      ConfigFactory.parseFile(configFile).withFallback(defaultConfig).resolve()
     } else {
       defaultConfig
     }
diff --git a/job-server/src/spark.jobserver/WebApi.scala b/job-server/src/spark.jobserver/WebApi.scala
@@ -117,7 +117,7 @@ class WebApi(system: ActorSystem,
             complete(StatusCodes.BadRequest, errMap("context name must start with letters"))
           } else {
             parameterMap { (params) =>
-              val config = ConfigFactory.parseMap(params.asJava)
+              val config = ConfigFactory.parseMap(params.asJava).resolve()
               val future = (supervisor ? AddContext(contextName, config))(contextTimeout.seconds)
               respondWithMediaType(MediaTypes.`application/json`) { ctx =>
                 future.map {
@@ -290,7 +290,7 @@ class WebApi(system: ActorSystem,
               try {
                 val async = !syncOpt.getOrElse(false)
                 val postedJobConfig = ConfigFactory.parseString(configString)
-                val jobConfig = postedJobConfig.withFallback(config)
+                val jobConfig = postedJobConfig.withFallback(config).resolve()
                 val contextConfig = Try(jobConfig.getConfig("spark.context-settings")).
                                       getOrElse(ConfigFactory.empty)
                 val jobManager = getJobManagerForContext(contextOpt, contextConfig, classPath)
diff --git a/notes/0.5.2.markdown b/notes/0.5.2.markdown
@@ -1,5 +1,6 @@
 #Scala #akka @ApacheSpark
 
 * Spark streaming context support !!  (@zeitos)
-* Change `server_start.sh` to use `spark-submit`
+* Change `server_start.sh` to use `spark-submit`.  This should fix some edge case bugs.
 * Configurable driver memory (@acidghost)
+* Be able to accept environment vars in job server config files, eg `master = ${?MY_SPARK_HOST}`   (see the [Typesafe Config docs](https://github.com/typesafehub/config#uses-of-substitutions))

Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@ object JobServer {`
`36`	`36`	`println("Could not find configuration file " + configFile)`
`37`	`37`	`sys.exit(1)`
`38`	`38`	`}`
`39`		`- ConfigFactory.parseFile(configFile).withFallback(defaultConfig)`
	`39`	`+ ConfigFactory.parseFile(configFile).withFallback(defaultConfig).resolve()`
`40`	`40`	`} else {`
`41`	`41`	`defaultConfig`
`42`	`42`	`}`