Skip to content

Commit eed5140

Browse files
DSP-5892 Dse Configuration and Submit for JobServer
We use DSE spark-submit instead of spark-submit and all of the environement variables which are normally set by DSE are removed from the base config. The stop script is modified so that it will kill the entire process group rather than just the DSE shell process.
1 parent e160404 commit eed5140

File tree

5 files changed

+108
-3
lines changed

5 files changed

+108
-3
lines changed

bin/server_start.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ fi
7777
# This needs to be exported for standalone mode so drivers can connect to the Spark cluster
7878
export SPARK_HOME
7979

80-
$SPARK_HOME/bin/spark-submit --class $MAIN --driver-memory $DRIVER_MEMORY \
80+
# DSE_BIN is set in settings.sh
81+
$DSE_HOME/bin/dse spark-submit --class $MAIN --driver-memory 5G \
8182
--conf "spark.executor.extraJavaOptions=$LOGGING_OPTS" \
8283
--driver-java-options "$GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES" \
8384
$@ $appdir/spark-job-server.jar $conffile 2>&1 &

bin/server_stop.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ if [ ! -f "$pidFilePath" ] || ! kill -0 $(cat "$pidFilePath"); then
2323
echo 'Job server not running'
2424
else
2525
echo 'Stopping job server...'
26-
kill -15 $(cat "$pidFilePath") && rm -f "$pidFilePath"
26+
PID=$(cat "$pidFilePath")
27+
kill -15 -- -$(ps -o pgid= $PID | grep -o [0-9]*) && rm -f "$pidFilePath"
2728
echo '...job server stopped'
2829
fi

job-server/config/dse.conf

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Template for a Spark Job Server configuration file
2+
# When deployed these settings are loaded when job server starts
3+
#
4+
# Spark Cluster / Job Server configuration
5+
spark {
6+
# Spark Master will be automatically learned via the DSE
7+
# spark.master will be passed to each job's JobContext
8+
# master = "local[4]"
9+
# master = "mesos://vm28-hulk-pub:5050"
10+
# master = "yarn-client"
11+
12+
# Default # of CPUs for jobs to use for Spark standalone cluster
13+
job-number-cpus = 4
14+
15+
jobserver {
16+
port = 8090
17+
jar-store-rootdir = /tmp/jobserver/jars
18+
19+
jobdao = spark.jobserver.io.JobFileDAO
20+
21+
filedao {
22+
rootdir = /tmp/spark-job-server/filedao/data
23+
}
24+
}
25+
26+
# predefined Spark contexts
27+
# contexts {
28+
# my-low-latency-context {
29+
# num-cpu-cores = 1 # Number of cores to allocate. Required.
30+
# memory-per-node = 512m # Executor memory per node, -Xmx style eg 512m, 1G, etc.
31+
# }
32+
# # define additional contexts here
33+
# }
34+
35+
# universal context configuration. These settings can be overridden, see README.md
36+
context-settings {
37+
num-cpu-cores = 2 # Number of cores to allocate. Required.
38+
memory-per-node = 512m # Executor memory per node, -Xmx style eg 512m, #1G, etc.
39+
40+
# in case spark distribution should be accessed from HDFS (as opposed to being installed on every mesos slave)
41+
# spark.executor.uri = "hdfs://namenode:8020/apps/spark/spark.tgz"
42+
43+
# uris of jars to be loaded into the classpath for this context. Uris is a string list, or a string separated by commas ','
44+
# dependent-jar-uris = ["file:///some/path/present/in/each/mesos/slave/somepackage.jar"]
45+
46+
# If you wish to pass any settings directly to the sparkConf as-is, add them here in passthrough,
47+
# such as hadoop connection settings that don't use the "spark." prefix
48+
passthrough {
49+
#es.nodes = "192.1.1.1"
50+
}
51+
}
52+
53+
# This needs to match SPARK_HOME for cluster SparkContexts to be created successfully
54+
# home = "/home/spark/spark"
55+
}
56+
57+
# Note that you can use this file to define settings not only for job server,
58+
# but for your Spark jobs as well. Spark job configuration merges with this configuration file as defaults.

job-server/config/dse.sh

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# DataStax Distribution Config
2+
# Environment and deploy file
3+
# For use with bin/server_deploy, bin/server_package etc.
4+
5+
APP_USER=cassandra
6+
APP_GROUP=cassandra
7+
8+
#Check Home Directory
9+
#Relative Tar Location and
10+
#Package location for dse-env.sh to get environment variables
11+
if [ -z "$DSE_ENV" ]; then
12+
for include in "$HOME/.dse-env.sh" \
13+
"`dirname "$0"`/../../bin/dse-env.sh" \
14+
"/etc/dse/dse-env.sh"; do
15+
if [ -r "$include" ]; then
16+
DSE_ENV="$include"
17+
break
18+
fi
19+
done
20+
fi
21+
22+
#ENV is set for the build script server_package, If it isn't set then we need
23+
# to be able to read DSE_ENV to set Spark Env variables
24+
if [ -z "$DSE_ENV" ] && [ -z "$ENV" ]; then
25+
echo "DSE_ENV could not be determined."
26+
exit 1
27+
elif [ -r "$DSE_ENV" ]; then
28+
. "$DSE_ENV"
29+
elif [ -z "$ENV" ]; then
30+
echo "Location pointed by DSE_ENV not readable: $DSE_ENV"
31+
exit 1
32+
fi
33+
34+
SPARK_VERSION=1.3.1
35+
36+
DEPLOY_HOSTS="localhost"
37+
38+
INSTALL_DIR=$DSE_COMPONENTS_ROOT/spark-jobserver
39+
LOG_DIR=/var/log/job-server
40+
41+
PIDFILE=spark-jobserver.pid
42+
43+
SPARK_CONF_DIR=$SPARK_HOME/conf
44+
45+
SCALA_VERSION=2.10.4 # or 2.11.6

version.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version in ThisBuild := "0.5.2-SNAPSHOT"
1+
version in ThisBuild := "0.5.2-DSP-5892-SNAPSHOT"

0 commit comments

Comments
 (0)