@@ -7,7 +7,9 @@ if [ "$#" -lt 1 ]; then
77 exit 1
88fi
99
10+ LOCAL=NO
1011DEBUG=NO
12+ DEBUG_EXECUTOR=NO
1113WORKERS=1
1214POSITIONAL=()
1315while [[ $# -gt 0 ]]
@@ -24,6 +26,14 @@ case $key in
2426 DEBUG=YES
2527 shift # past argument
2628 ;;
29+ -de|--debug-exec)
30+ DEBUG_EXECUTOR=YES
31+ shift # past argument
32+ ;;
33+ -l|--local)
34+ LOCAL=YES
35+ shift # past argument
36+ ;;
2737 * ) # unknown option
2838 POSITIONAL+=(" $1 " ) # save it in an array for later
2939 shift # past argument
@@ -34,37 +44,85 @@ set -- "${POSITIONAL[@]}" # restore positional parameters
3444
3545echo " DEBUG" = " ${DEBUG} "
3646echo " WORKERS" = " ${WORKERS} "
47+ echo " LOCAL" = " ${LOCAL} "
48+ TEST=NO
49+ # set_speed $WORKERS
50+ DOCKER=sparkmaster
51+ DOCKER=sparklauncher
52+ # --conf "spark.submit.pyFiles=/build/dike.zip" \
3753if [ ${DEBUG} == " YES" ]; then
3854 echo " Debugging"
39- docker exec -it sparkmaster spark-submit --master local \
55+ docker exec -it ${DOCKER} spark-submit --master local \
4056 --class main.scala.TpchQuery \
4157 --conf " spark.jars.ivy=/build/ivy" \
4258 --conf " spark.driver.maxResultSize=20g" \
4359 --conf " spark.driver.memory=2g" \
4460 --conf " spark.executor.memory=2g" \
45- --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/ -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=172.18.0.3:5005 " \
46- --packages com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
61+ --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*: -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=10.124.48.63:5006 " \
62+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github. scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
4763 --jars /build/downloads/spark-sql-macros_2.12.10_0.1.0-SNAPSHOT.jar,/dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/build/extra_jars/* ,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar,/build/downloads/h2-1.4.200.jar \
4864 /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
4965 # --packages com.github.scopt:scopt_2.12:4.0.0-RC2,com.amazonaws:aws-java-sdk:1.11.853,org.apache.hadoop:hadoop-aws:3.2.0,org.apache.commons:commons-csv:1.8 \
5066 # --conf "spark.sql.parquet.enableVectorizedReader=false" \
51- else
52- docker exec -it sparkmaster spark-submit --master local[$WORKERS ] \
67+ elif [ ${DEBUG_EXECUTOR} == " YES" ]; then
68+ echo " Debugging"
69+ docker exec -it ${DOCKER} spark-submit --master local \
70+ --class main.scala.TpchQuery \
71+ --conf " spark.jars.ivy=/build/ivy" \
72+ --conf " spark.driver.maxResultSize=20g" \
73+ --conf " spark.driver.memory=2g" \
74+ --conf " spark.executor.memory=2g" \
75+ --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/ -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=172.169.1.10:5005" \
76+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
77+ --jars /build/downloads/spark-sql-macros_2.12.10_0.1.0-SNAPSHOT.jar,/dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/build/extra_jars/* ,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar,/build/downloads/h2-1.4.200.jar \
78+ /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
79+ elif [ ${LOCAL} == " YES" ]; then
80+ echo " Local with $WORKERS workers."
81+ docker exec -it ${DOCKER} spark-submit --master local[$WORKERS ] \
5382 --conf " ivy.shared.default.root=/build/ivy_jars" \
54- --conf " spark.driver.extraClassPath=/build/extra_jars/*" \
55- --conf " spark.executor.extraClassPath=/build/extra_jars/*" \
5683 --conf " spark.jars.ivy=/build/ivy" \
5784 --conf " spark.driver.maxResultSize=20g" \
85+ --conf " spark.task.maxDirectResultSize=20g" \
5886 --conf " spark.sql.broadcastTimeout=10000000" \
59- --conf " spark.driver.memory=32g" \
60- --conf " spark.executor.memory=32g" \
87+ --conf " spark.driver.memory=2g" \
88+ --conf " spark.executor.memory=2g" \
89+ --conf " spark.dynamicAllocation.enabled=false" \
6190 --conf " spark.eventLog.enabled=true" \
6291 --conf " spark.eventLog.dir=/build/spark-events" \
6392 --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/" \
64- --packages com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
65- --jars /build/downloads/spark-sql-macros_2.12.10_0.1.0-SNAPSHOT.jar,/dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/build/extra_jars/* ,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar,/build/downloads/h2-1.4.200.jar \
93+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
94+ --jars /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar \
95+ /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
96+ elif [ ${TEST} != " YES" ]; then
97+ # local[$WORKERS]
98+ # spark://172.18.0.2:7077
99+ # --conf "spark.executor.instances=1" \
100+ # --conf "spark.executor.cores=1" \
101+ HOST=sparkmaster
102+ HOST=172.169.1.40
103+ DRIVER_IP=172.169.1.40
104+ docker exec -it ${DOCKER} spark-submit --total-executor-cores $WORKERS \
105+ --master spark://$HOST :7077 \
106+ --conf " ivy.shared.default.root=/build/ivy_jars" \
107+ --conf " spark.jars.ivy=/build/ivy" \
108+ --conf " spark.driver.maxResultSize=20g" \
109+ --conf " spark.task.maxDirectResultSize=20g" \
110+ --conf " spark.sql.broadcastTimeout=10000000" \
111+ --conf " spark.driver.memory=2g" \
112+ --conf " spark.executor.memory=2g" \
113+ --conf " spark.dynamicAllocation.enabled=false" \
114+ --conf " spark.eventLog.enabled=true" \
115+ --conf " spark.eventLog.dir=/build/spark-events" \
116+ --conf " spark.hadoop.dfs.client.use.datanode.hostname=true" \
117+ --conf " spark.hadoop.dfs.namenode.rpc-address=172.169.1.60:9000" \
118+ --conf " spark.driver.host=${DRIVER_IP} " \
119+ --conf " spark.driver.bindAddress=${DRIVER_IP} " \
120+ --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/" \
121+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
122+ --jars /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar \
66123 /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
67124fi
125+ # --jars /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar \
68126
69127# ,org.dike.hdfs:ndp-hdfs:1.0 /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0-jar-with-dependencies.jar,
70128# --repositories file:/build/dike \
0 commit comments