Skip to content

Commit 130ddbf

Browse files
Merge tag 'v0.6.2' into DSP-5934-0.6.2
Brings DSE branch up to 0.6.2 Marking as 0.6.2.100 Conflicts: bin/server_package.sh bin/server_start.sh version.sbt
2 parents 1c0a4eb + dadd89a commit 130ddbf

File tree

108 files changed

+5660
-1000
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+5660
-1000
lines changed

.gitignore

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,13 @@ config/*.conf
1313
config/*.sh
1414
job-server/config/*.conf
1515
job-server/config/*.sh
16-
metastore_db/
16+
metastore_db/
17+
18+
#ignore generated config
19+
bin/ec2_example.sh
20+
21+
# ignore spark-ec2 script
22+
ec2Cluster/
23+
24+
# don't ignore the ec2 config and sh files
25+
!job-server/config/ec2.sh

.travis.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,8 @@ env:
33
global:
44
_JAVA_OPTIONS="-Xmx1500m -XX:MaxPermSize=512m -Dakka.test.timefactor=3"
55
scala:
6-
- 2.10.4
7-
- 2.11.6
6+
- 2.10.6
7+
- 2.11.8
8+
jdk:
9+
- oraclejdk8
10+
- oraclejdk7

README.md

Lines changed: 243 additions & 22 deletions
Large diffs are not rendered by default.

bin/ec2_deploy.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/bin/bash
2+
bin=`dirname "${BASH_SOURCE-$0}"`
3+
bin=`cd "$bin"; pwd`
4+
5+
. "$bin"/../config/user-ec2-settings.sh
6+
7+
#get spark deployment scripts if they haven't been downloaded and extracted yet
8+
SPARK_DIR=ec2Cluster
9+
if [ ! -d "$bin"/../$SPARK_DIR ]; then
10+
mkdir "$bin"/../$SPARK_DIR
11+
mkdir "$bin"/../$SPARK_DIR/deploy.generic/root/spark-ec2
12+
wget -P "$bin"/../$SPARK_DIR/deploy.generic/root/spark-ec2 https://raw.githubusercontent.com/apache/spark/master/ec2/deploy.generic/root/spark-ec2/ec2-variables.sh
13+
wget -P "$bin"/../$SPARK_DIR https://raw.githubusercontent.com/apache/spark/master/ec2/spark_ec2.py
14+
wget -P "$bin"/../$SPARK_DIR https://raw.githubusercontent.com/apache/spark/master/ec2/spark-ec2
15+
chmod u+x "$bin"/../$SPARK_DIR/*
16+
fi
17+
18+
#run spark-ec2 to start ec2 cluster
19+
EC2DEPLOY="$bin"/../$SPARK_DIR/spark-ec2
20+
"$EC2DEPLOY" --copy-aws-credentials --key-pair=$KEY_PAIR --hadoop-major-version=yarn --identity-file=$SSH_KEY --region=us-east-1 --zone=us-east-1a --spark-version=$SPARK_VERSION --instance-type=$INSTANCE_TYPE --slaves $NUM_SLAVES launch $CLUSTER_NAME
21+
#There is only 1 deploy host. However, the variable is plural as that is how Spark Job Server named it.
22+
#To minimize changes, I left the variable name alone.
23+
export DEPLOY_HOSTS=$("$EC2DEPLOY" get-master $CLUSTER_NAME | tail -n1)
24+
25+
#This line is a hack to edit the ec2.conf file so that the master option is correct. Since we are allowing Amazon to
26+
#dynamically allocate a url for the master node, we must update the configuration file in between cluster startup
27+
#and Job Server deployment
28+
cp "$bin"/../config/ec2.conf.template "$bin"/../config/ec2.conf
29+
sed -i -E "s/master = .*/master = \"spark:\/\/$DEPLOY_HOSTS:7077\"/g" "$bin"/../config/ec2.conf
30+
31+
#also get ec2_example.sh right
32+
cp "$bin"/ec2_example.sh.template "$bin"/ec2_example.sh
33+
sed -i -E "s/DEPLOY_HOSTS=.*/DEPLOY_HOSTS=\"$DEPLOY_HOSTS:8090\"/g" "$bin"/ec2_example.sh
34+
35+
#open all ports so the master for Spark Job Server to work and you can see the results of your jobs
36+
aws ec2 authorize-security-group-ingress --group-name $CLUSTER_NAME-master --protocol tcp --port 0-65535 --cidr 0.0.0.0/0
37+
38+
cd "$bin"/..
39+
bin/server_deploy.sh ec2
40+
ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" root@$DEPLOY_HOSTS "(echo 'export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID' >> spark/conf/spark-env.sh)"
41+
ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" root@$DEPLOY_HOSTS "(echo 'export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY' >> spark/conf/spark-env.sh)"
42+
ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" root@$DEPLOY_HOSTS "(cd job-server; nohup ./server_start.sh < /dev/null &> /dev/null &)"
43+
echo "The Job Server is listening at $DEPLOY_HOSTS:8090"

bin/ec2_destroy.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
bin=`dirname "${BASH_SOURCE-$0}"`
3+
bin=`cd "$bin"; pwd`
4+
5+
. "$bin"/../config/user-ec2-settings.sh
6+
7+
"$bin"/../ec2Cluster/spark-ec2 destroy $CLUSTER_NAME

bin/ec2_example.sh.template

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
DEPLOY_HOSTS=ENTER_DEPLOY_HOST_HERE
2+
bin=`dirname "${BASH_SOURCE-$0}"`
3+
bin=`cd "$bin"; pwd`
4+
5+
. "$bin"/../config/ec2.sh
6+
7+
ssh_key_to_use=""
8+
if [ -n "$SSH_KEY" ] ; then
9+
ssh_key_to_use="-i $SSH_KEY"
10+
fi
11+
12+
VERSION=$(sed -E 's/version in ThisBuild := "(.*)"/\1/' version.sbt)
13+
wget -O- --post-file "$bin"/../job-server-extras/target/scala-2.10/job-server-extras_2.10-$VERSION.jar "$DEPLOY_HOSTS/jars/km"
14+
scp -rp -o StrictHostKeyChecking=no $ssh_key_to_use "$bin"/../job-server-extras/src/main/KMeansExample/* ${APP_USER}@"${DEPLOY_HOSTS%:*}:/var/www/html/"
15+
16+
echo "The example is running at ${DEPLOY_HOSTS%:*}:5080"

bin/manager_start.sh

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
# Script to start the job manager
3+
# args: <work dir for context> <cluster address> [proxy_user]
4+
set -e
5+
6+
get_abs_script_path() {
7+
pushd . >/dev/null
8+
cd $(dirname $0)
9+
appdir=$(pwd)
10+
popd >/dev/null
11+
}
12+
get_abs_script_path
13+
14+
. $appdir/setenv.sh
15+
16+
# Override logging options to provide per-context logging
17+
LOGGING_OPTS="-Dlog4j.configuration=file:$appdir/log4j-server.properties
18+
-DLOG_DIR=$1"
19+
20+
GC_OPTS="-XX:+UseConcMarkSweepGC
21+
-verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out
22+
-XX:MaxPermSize=512m
23+
-XX:+CMSClassUnloadingEnabled "
24+
25+
JAVA_OPTS="-XX:MaxDirectMemorySize=$MAX_DIRECT_MEMORY
26+
-XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true"
27+
28+
MAIN="spark.jobserver.JobManager"
29+
30+
if [ ! -z $3 ]; then
31+
cmd='$SPARK_HOME/bin/spark-submit --class $MAIN --driver-memory $JOBSERVER_MEMORY
32+
--conf "spark.executor.extraJavaOptions=$LOGGING_OPTS"
33+
--proxy-user $3
34+
--driver-java-options "$GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES"
35+
$appdir/spark-job-server.jar $1 $2 $conffile'
36+
else
37+
cmd='$SPARK_HOME/bin/spark-submit --class $MAIN --driver-memory $JOBSERVER_MEMORY
38+
--conf "spark.executor.extraJavaOptions=$LOGGING_OPTS"
39+
--driver-java-options "$GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES"
40+
$appdir/spark-job-server.jar $1 $2 $conffile'
41+
fi
42+
43+
eval $cmd > /dev/null 2>&1 &
44+
# exec java -cp $CLASSPATH $GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES $MAIN $1 $2 $conffile 2>&1 &

bin/server_deploy.sh

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ if [ ! -f "$configFile" ]; then
1818
echo "Could not find $configFile"
1919
exit 1
2020
fi
21-
. $configFile
21+
. "$configFile"
2222

2323
majorRegex='([0-9]+\.[0-9]+)\.[0-9]+'
2424
if [[ $SCALA_VERSION =~ $majorRegex ]]
@@ -42,8 +42,10 @@ FILES="job-server-extras/target/scala-$majorVersion/spark-job-server.jar
4242
bin/server_start.sh
4343
bin/server_stop.sh
4444
bin/kill-process-tree.sh
45+
bin/manager_start.sh
46+
bin/setenv.sh
4547
$CONFIG_DIR/$ENV.conf
46-
config/shiro.ini
48+
config/shiro.ini
4749
config/log4j-server.properties"
4850

4951
ssh_key_to_use=""
@@ -53,7 +55,9 @@ fi
5355

5456
for host in $DEPLOY_HOSTS; do
5557
# We assume that the deploy user is APP_USER and has permissions
56-
ssh $ssh_key_to_use ${APP_USER}@$host mkdir -p $INSTALL_DIR
57-
scp $ssh_key_to_use $FILES ${APP_USER}@$host:$INSTALL_DIR/
58-
scp $ssh_key_to_use $configFile ${APP_USER}@$host:$INSTALL_DIR/settings.sh
58+
ssh -o StrictHostKeyChecking=no $ssh_key_to_use ${APP_USER}@$host mkdir -p $INSTALL_DIR
59+
scp -o StrictHostKeyChecking=no $ssh_key_to_use $FILES ${APP_USER}@$host:$INSTALL_DIR/
60+
scp -o StrictHostKeyChecking=no $ssh_key_to_use "$CONFIG_DIR/$ENV.conf" ${APP_USER}@$host:$INSTALL_DIR/
61+
scp -o StrictHostKeyChecking=no $ssh_key_to_use "$configFile" ${APP_USER}@$host:$INSTALL_DIR/settings.sh
5962
done
63+

bin/server_package.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,11 @@ FILES="job-server-extras/target/scala-$majorVersion/spark-job-server.jar
4545
bin/server_start.sh
4646
bin/server_stop.sh
4747
bin/kill-process-tree.sh
48+
bin/manager_start.sh
49+
bin/setenv.sh
4850
$CONFIG_DIR/$ENV.conf
49-
config/logback-server.xml"
51+
config/logback-server.xml
52+
config/shiro.ini"
5053

5154
rm -rf $WORK_DIR
5255
mkdir -p $WORK_DIR

bin/server_start.sh

Lines changed: 8 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ get_abs_script_path() {
1818

1919
get_abs_script_path
2020

21+
. $appdir/setenv.sh
22+
2123
GC_OPTS="-XX:+UseConcMarkSweepGC
2224
-verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out
2325
-XX:MaxPermSize=512m
@@ -26,7 +28,7 @@ GC_OPTS="-XX:+UseConcMarkSweepGC
2628
# To truly enable JMX in AWS and other containerized environments, also need to set
2729
# -Djava.rmi.server.hostname equal to the hostname in that environment. This is specific
2830
# depending on AWS vs GCE etc.
29-
JAVA_OPTS="-XX:MaxDirectMemorySize=512M \
31+
JAVA_OPTS="-XX:MaxDirectMemorySize=$MAX_DIRECT_MEMORY \
3032
-XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true \
3133
-Dcom.sun.management.jmxremote.port=9999 \
3234
-Dcom.sun.management.jmxremote.rmi.port=9999 \
@@ -35,89 +37,20 @@ JAVA_OPTS="-XX:MaxDirectMemorySize=512M \
3537

3638
MAIN="spark.jobserver.JobServer"
3739

38-
if [ -f "$JOBSERVER_CONFIG" ]; then
39-
conffile="$JOBSERVER_CONFIG"
40-
else
41-
conffile=$(ls -1 $appdir/*.conf | head -1)
42-
if [ -z "$conffile" ]; then
43-
echo "No configuration file found"
44-
exit 1
45-
fi
46-
fi
47-
48-
if [ -f "$appdir/settings.sh" ]; then
49-
. "$appdir/settings.sh"
50-
else
51-
echo "Missing $appdir/settings.sh, exiting"
52-
exit 1
53-
fi
54-
55-
if [ -z "$SPARK_HOME" ]; then
56-
echo "Please set SPARK_HOME or put it in $appdir/settings.sh first"
57-
exit 1
58-
fi
59-
60-
pidFilePath=$appdir/$PIDFILE
61-
62-
if [ -f "$pidFilePath" ] && kill -0 "$(cat "$pidFilePath")"; then
40+
PIDFILE=$appdir/spark-jobserver.pid
41+
if [ -f "$PIDFILE" ] && kill -0 $(cat "$PIDFILE"); then
6342
echo 'Job server is already running'
6443
exit 1
6544
fi
6645

67-
if [ -z "$LOG_DIR" ]; then
68-
LOG_DIR=/tmp/job-server
69-
echo "LOG_DIR empty; logging will go to $LOG_DIR"
70-
fi
71-
mkdir -p $LOG_DIR
72-
73-
LOGGING_OPTS="-DLOG_DIR=$LOG_DIR"
74-
75-
export SPARK_SUBMIT_LOGBACK_CONF_FILE="$appdir/logback-server.xml"
76-
77-
# For Mesos
78-
CONFIG_OVERRIDES=""
79-
if [ -n "$SPARK_EXECUTOR_URI" ]; then
80-
CONFIG_OVERRIDES="-Dspark.executor.uri=$SPARK_EXECUTOR_URI "
81-
fi
82-
# For Mesos/Marathon, use the passed-in port
83-
if [ "$PORT" != "" ]; then
84-
CONFIG_OVERRIDES+="-Dspark.jobserver.port=$PORT "
85-
fi
86-
87-
if [ -z "$JOBSERVER_MEMORY" ]; then
88-
JOBSERVER_MEMORY=1G
89-
fi
90-
91-
# This needs to be exported for standalone mode so drivers can connect to the Spark cluster
92-
export SPARK_HOME
93-
export YARN_CONF_DIR
94-
export HADOOP_CONF_DIR
95-
96-
# Identify location of dse command
97-
DSE="/usr/bin/dse"
98-
if [ -z "$DSE_HOME" ]; then
99-
if [ -e "$DSE" ]; then
100-
export DSE_HOME=/usr/share/dse
101-
fi
102-
fi
103-
if [ ! -e "$DSE" ]; then
104-
if [ -e "$DSE_HOME"/bin/dse ]; then
105-
DSE="$DSE_HOME"/bin/dse
106-
else
107-
echo "Cannot determine DSE_HOME, please set it manually to your DSE install directory"
108-
exit 1
109-
fi
110-
fi
111-
112-
# Submit the job server
113-
cmd='$DSE spark-submit --class $MAIN --driver-memory $JOBSERVER_MEMORY
46+
cmd='$SPARK_HOME/bin/spark-submit --class $MAIN --driver-memory $JOBSERVER_MEMORY
11447
--conf "spark.executor.extraJavaOptions=$LOGGING_OPTS"
11548
--driver-java-options "$GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES"
11649
$@ $appdir/spark-job-server.jar $conffile'
11750

11851
if [ -z "$JOBSERVER_FG" ]; then
119-
eval $cmd 2>&1 &
120-
echo $! > $pidFilePath
52+
eval $cmd > /dev/null 2>&1 < /dev/null &
53+
echo $! > $PIDFILE
12154
else
12255
eval $cmd
12356
fi

0 commit comments

Comments
 (0)