DSP-5892 Dse Configuration and Submit for JobServer

RussellSpitzer · RussellSpitzer · commit eed51409702a · 2015-07-08T14:15:04.000-07:00
We use DSE spark-submit instead of spark-submit and all of the
environement variables which are normally set by DSE are removed from
the base config.

The stop script is modified so that it will kill the entire process
group rather than just the DSE shell process.
diff --git a/bin/server_start.sh b/bin/server_start.sh
@@ -77,7 +77,8 @@ fi
 # This needs to be exported for standalone mode so drivers can connect to the Spark cluster
 export SPARK_HOME
 
-$SPARK_HOME/bin/spark-submit --class $MAIN --driver-memory $DRIVER_MEMORY \
+# DSE_BIN is set in settings.sh
+$DSE_HOME/bin/dse spark-submit --class $MAIN --driver-memory 5G \
   --conf "spark.executor.extraJavaOptions=$LOGGING_OPTS" \
   --driver-java-options "$GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES" \
   $@ $appdir/spark-job-server.jar $conffile 2>&1 &
diff --git a/bin/server_stop.sh b/bin/server_stop.sh
@@ -23,6 +23,7 @@ if [ ! -f "$pidFilePath" ] || ! kill -0 $(cat "$pidFilePath"); then
    echo 'Job server not running'
 else
   echo 'Stopping job server...'
-  kill -15 $(cat "$pidFilePath") && rm -f "$pidFilePath"
+  PID=$(cat "$pidFilePath")
+  kill -15 -- -$(ps -o pgid= $PID | grep -o [0-9]*) && rm -f "$pidFilePath"
   echo '...job server stopped'
 fi
diff --git a/job-server/config/dse.conf b/job-server/config/dse.conf
@@ -0,0 +1,58 @@
+# Template for a Spark Job Server configuration file
+# When deployed these settings are loaded when job server starts
+#
+# Spark Cluster / Job Server configuration
+spark {
+  # Spark Master will be automatically learned via the DSE
+  # spark.master will be passed to each job's JobContext
+  # master = "local[4]"
+  # master = "mesos://vm28-hulk-pub:5050"
+  # master = "yarn-client"
+
+  # Default # of CPUs for jobs to use for Spark standalone cluster
+  job-number-cpus = 4
+
+  jobserver {
+    port = 8090
+    jar-store-rootdir = /tmp/jobserver/jars
+
+    jobdao = spark.jobserver.io.JobFileDAO
+
+    filedao {
+      rootdir = /tmp/spark-job-server/filedao/data
+    }
+  }
+
+  # predefined Spark contexts
+  # contexts {
+  #   my-low-latency-context {
+  #     num-cpu-cores = 1           # Number of cores to allocate.  Required.
+  #     memory-per-node = 512m         # Executor memory per node, -Xmx style eg 512m, 1G, etc.
+  #   }
+  #   # define additional contexts here
+  # }
+
+  # universal context configuration.  These settings can be overridden, see README.md
+  context-settings {
+    num-cpu-cores = 2           # Number of cores to allocate.  Required.
+    memory-per-node = 512m         # Executor memory per node, -Xmx style eg 512m, #1G, etc.
+
+    # in case spark distribution should be accessed from HDFS (as opposed to being installed on every mesos slave)
+    # spark.executor.uri = "hdfs://namenode:8020/apps/spark/spark.tgz"
+
+    # uris of jars to be loaded into the classpath for this context. Uris is a string list, or a string separated by commas ','
+    # dependent-jar-uris = ["file:///some/path/present/in/each/mesos/slave/somepackage.jar"]
+    
+    # If you wish to pass any settings directly to the sparkConf as-is, add them here in passthrough,
+    # such as hadoop connection settings that don't use the "spark." prefix
+    passthrough {
+      #es.nodes = "192.1.1.1"
+    }
+  }
+
+  # This needs to match SPARK_HOME for cluster SparkContexts to be created successfully
+  # home = "/home/spark/spark"
+}
+
+# Note that you can use this file to define settings not only for job server,
+# but for your Spark jobs as well.  Spark job configuration merges with this configuration file as defaults.
diff --git a/job-server/config/dse.sh b/job-server/config/dse.sh
@@ -0,0 +1,45 @@
+# DataStax Distribution Config
+# Environment and deploy file
+# For use with bin/server_deploy, bin/server_package etc.
+
+APP_USER=cassandra
+APP_GROUP=cassandra
+
+#Check Home Directory
+#Relative Tar Location and
+#Package location for dse-env.sh to get environment variables
+if [ -z "$DSE_ENV" ]; then
+    for include in "$HOME/.dse-env.sh" \
+                   "`dirname "$0"`/../../bin/dse-env.sh" \
+                   "/etc/dse/dse-env.sh"; do
+        if [ -r "$include" ]; then
+            DSE_ENV="$include"
+            break
+        fi
+    done
+fi
+
+#ENV is set for the build script server_package, If it isn't set then we need
+# to be able to read DSE_ENV to set Spark Env variables
+if [ -z "$DSE_ENV" ] &&  [ -z "$ENV" ]; then
+    echo "DSE_ENV could not be determined."
+    exit 1
+elif [ -r "$DSE_ENV" ]; then
+    . "$DSE_ENV"
+elif [ -z "$ENV" ]; then
+    echo "Location pointed by DSE_ENV not readable: $DSE_ENV"
+    exit 1
+fi
+
+SPARK_VERSION=1.3.1
+
+DEPLOY_HOSTS="localhost"
+
+INSTALL_DIR=$DSE_COMPONENTS_ROOT/spark-jobserver
+LOG_DIR=/var/log/job-server
+
+PIDFILE=spark-jobserver.pid
+
+SPARK_CONF_DIR=$SPARK_HOME/conf
+
+SCALA_VERSION=2.10.4 # or 2.11.6
diff --git a/version.sbt b/version.sbt
@@ -1 +1 @@
-version in ThisBuild := "0.5.2-SNAPSHOT"
+version in ThisBuild := "0.5.2-DSP-5892-SNAPSHOT"

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-version in ThisBuild := "0.5.2-SNAPSHOT"`
	`1`	`+version in ThisBuild := "0.5.2-DSP-5892-SNAPSHOT"`