Merge pull request spark-jobserver#415 from hntd187/master

velvia · velvia · commit 206494781b10 · 2016-04-11T07:39:49.000-07:00
Added ability to add to Spark's Hadoop configuration
diff --git a/README.md b/README.md
@@ -600,6 +600,14 @@ To pass settings directly to the sparkConf that do not use the "spark." prefix "
         }
     }
 
+To add to the underlying Hadoop configuration in a Spark context, add the hadoop section to the context settings
+
+    spark.context-settings {
+        hadoop {
+            mapreduce.framework.name = "Foo"
+        }
+    }
+
 For the exact context configuration parameters, see JobManagerActor docs as well as application.conf.
 
 Also see the [yarn doc](doc/yarn.md) for more tips.
diff --git a/job-server/src/main/resources/application.conf b/job-server/src/main/resources/application.conf
@@ -127,6 +127,11 @@ spark {
     passthrough {
       spark.driver.allowMultipleContexts = true  # Ignore the Multiple context exception related with SPARK-2243
     }
+
+    #This adds configuration to the underlying Hadoop configuration in the Spark Context
+    #hadoop {
+    #  mapreduce.framework.name = "FooFramework"
+    #}
   }
 }
 
diff --git a/job-server/src/spark.jobserver/JobManagerActor.scala b/job-server/src/spark.jobserver/JobManagerActor.scala
@@ -1,17 +1,21 @@
 package spark.jobserver
 
 import java.util.concurrent.Executors._
-import akka.actor.{ActorRef, Props, PoisonPill}
+
+import akka.actor.{ActorRef, PoisonPill, Props}
 import com.typesafe.config.Config
 import java.net.{URI, URL}
 import java.util.concurrent.atomic.AtomicInteger
+
 import ooyala.common.akka.InstrumentedActor
-import org.apache.spark.{ SparkEnv, SparkContext }
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
 import org.joda.time.DateTime
-import scala.concurrent.{ Future, ExecutionContext }
+
+import scala.concurrent.{ExecutionContext, Future}
 import scala.util.{Failure, Success, Try}
 import spark.jobserver.ContextSupervisor.StopContext
-import spark.jobserver.io.{JobDAOActor, JobDAO, JobInfo, JarInfo}
+import spark.jobserver.io.{JarInfo, JobDAO, JobDAOActor, JobInfo}
 import spark.jobserver.util.{ContextURLClassLoader, SparkJobUtils}
 
 object JobManagerActor {
@@ -20,9 +24,11 @@ object JobManagerActor {
   case class StartJob(appName: String, classPath: String, config: Config,
                       subscribedEvents: Set[Class[_]])
   case class KillJob(jobId: String)
+  case object GetContextConfig
   case object SparkContextStatus
 
   // Results/Data
+  case class ContextConfig(contextName: String, contextConfig: SparkConf, hadoopConfig: Configuration)
   case class Initialized(contextName: String, resultActor: ActorRef)
   case class InitError(t: Throwable)
   case class JobLoadingError(err: Throwable)
@@ -152,7 +158,23 @@ class JobManagerActor(contextConfig: Config) extends InstrumentedActor {
           sender ! SparkContextAlive
         } catch {
           case e: Exception => {
-            logger.error("SparkContext is not exist!")
+            logger.error("SparkContext does not exist!")
+            sender ! SparkContextDead
+          }
+        }
+      }
+    }
+    case GetContextConfig => {
+      if (jobContext.sparkContext == null) {
+        sender ! SparkContextDead
+      } else {
+        try {
+          val conf: SparkConf = jobContext.sparkContext.getConf
+          val hadoopConf: Configuration = jobContext.sparkContext.hadoopConfiguration
+          sender ! ContextConfig(jobContext.sparkContext.appName, conf, hadoopConf)
+        } catch {
+          case e: Exception => {
+            logger.error("SparkContext does not exist!")
             sender ! SparkContextDead
           }
         }
diff --git a/job-server/src/spark.jobserver/context/SparkContextFactory.scala b/job-server/src/spark.jobserver/context/SparkContextFactory.scala
@@ -50,9 +50,11 @@ class DefaultSparkContextFactory extends SparkContextFactory {
   type C = SparkContext with ContextLike
 
   def makeContext(sparkConf: SparkConf, config: Config,  contextName: String): C = {
-    new SparkContext(sparkConf) with ContextLike {
+    val sc = new SparkContext(sparkConf) with ContextLike {
       def sparkContext: SparkContext = this
       def isValidJob(job: SparkJobBase): Boolean = job.isInstanceOf[SparkJob]
     }
+    for ((k, v) <- SparkJobUtils.getHadoopConfig(config)) sc.hadoopConfiguration.set(k, v)
+    sc
   }
 }
diff --git a/job-server/src/spark.jobserver/util/SparkJobUtils.scala b/job-server/src/spark.jobserver/util/SparkJobUtils.scala
@@ -73,6 +73,17 @@ object SparkJobUtils {
     conf
   }
 
+  /**
+    *
+    * @param config the specific context configuration
+    * @return a map of the hadoop configuration values or an empty Map
+    */
+  def getHadoopConfig(config: Config): Map[String, String] = {
+    Try(config.getConfig("hadoop").entrySet().asScala.map { e =>
+      e.getKey -> e.getValue.unwrapped().toString
+    }.toMap).getOrElse(Map())
+  }
+
   /**
    * Returns the maximum number of jobs that can run at the same time
    */
diff --git a/job-server/test/spark.jobserver/LocalContextSupervisorSpec.scala b/job-server/test/spark.jobserver/LocalContextSupervisorSpec.scala
@@ -1,10 +1,12 @@
 package spark.jobserver
 
 import akka.actor._
-import akka.testkit.{TestKit, ImplicitSender}
+import akka.testkit.{ImplicitSender, TestKit}
 import com.typesafe.config.ConfigFactory
-import spark.jobserver.io.{JobDAOActor, JobDAO}
-import org.scalatest.{Matchers, FunSpecLike, BeforeAndAfterAll, BeforeAndAfter}
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.SparkConf
+import spark.jobserver.io.{JobDAO, JobDAOActor}
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSpecLike, Matchers}
 
 import scala.concurrent.duration._
 
@@ -36,6 +38,9 @@ object LocalContextSupervisorSpec {
           spark.driver.allowMultipleContexts = true
           spark.ui.enabled = false
         }
+        hadoop {
+          mapreduce.framework.name = "ayylmao"
+        }
       }
     }
     akka.log-dead-letters = 0
@@ -71,6 +76,7 @@ class LocalContextSupervisorSpec extends TestKit(LocalContextSupervisorSpec.syst
   }
 
   import ContextSupervisor._
+  import JobManagerActor._
 
   describe("context management") {
     it("should list empty contexts at startup") {
@@ -99,6 +105,21 @@ class LocalContextSupervisorSpec extends TestKit(LocalContextSupervisorSpec.syst
       rActor.path.toString should not include ("global")
     }
 
+    it("should be able to get context configs") {
+      supervisor ! AddContext("c1", contextConfig)
+      expectMsg(ContextInitialized)
+      supervisor ! GetContext("c1")
+      expectMsgPF(5 seconds, "I can't find that context :'-(") {
+        case (contextActor: ActorRef, resultActor: ActorRef) => {
+          contextActor ! GetContextConfig
+          val cc = expectMsgClass(classOf[ContextConfig])
+          cc.contextName shouldBe "c1"
+          cc.contextConfig.get("spark.ui.enabled") shouldBe "false"
+          cc.hadoopConfig.get("mapreduce.framework.name") shouldBe "ayylmao"
+        }
+      }
+    }
+
     it("should be able to stop contexts already running") {
       supervisor ! AddContext("c1", contextConfig)
       expectMsg(ContextInitialized)

Original file line number	Diff line number	Diff line change
`@@ -600,6 +600,14 @@ To pass settings directly to the sparkConf that do not use the "spark." prefix "`
`600`	`600`	`}`
`601`	`601`	`}`
`602`	`602`
	`603`	`+To add to the underlying Hadoop configuration in a Spark context, add the hadoop section to the context settings`
	`604`	`+`
	`605`	`+ spark.context-settings {`
	`606`	`+ hadoop {`
	`607`	`+ mapreduce.framework.name = "Foo"`
	`608`	`+ }`
	`609`	`+ }`
	`610`	`+`
`603`	`611`	`For the exact context configuration parameters, see JobManagerActor docs as well as application.conf.`
`604`	`612`
`605`	`613`	`Also see the [yarn doc](doc/yarn.md) for more tips.`
Original file line number	Diff line number	Diff line change
`@@ -127,6 +127,11 @@ spark {`
`127`	`127`	`passthrough {`
`128`	`128`	`spark.driver.allowMultipleContexts = true # Ignore the Multiple context exception related with SPARK-2243`
`129`	`129`	`}`
	`130`	`+`
	`131`	`+ #This adds configuration to the underlying Hadoop configuration in the Spark Context`
	`132`	`+ #hadoop {`
	`133`	`+ # mapreduce.framework.name = "FooFramework"`
	`134`	`+ #}`
`130`	`135`	`}`
`131`	`136`	`}`
`132`	`137`
Original file line number	Diff line number	Diff line change
`@@ -50,9 +50,11 @@ class DefaultSparkContextFactory extends SparkContextFactory {`
`50`	`50`	`type C = SparkContext with ContextLike`
`51`	`51`
`52`	`52`	`def makeContext(sparkConf: SparkConf, config: Config, contextName: String): C = {`
`53`		`- new SparkContext(sparkConf) with ContextLike {`
	`53`	`+ val sc = new SparkContext(sparkConf) with ContextLike {`
`54`	`54`	`def sparkContext: SparkContext = this`
`55`	`55`	`def isValidJob(job: SparkJobBase): Boolean = job.isInstanceOf[SparkJob]`
`56`	`56`	`}`
	`57`	`+ for ((k, v) <- SparkJobUtils.getHadoopConfig(config)) sc.hadoopConfiguration.set(k, v)`
	`58`	`+ sc`
`57`	`59`	`}`
`58`	`60`	`}`