@@ -8,7 +8,9 @@ if [ "$#" -lt 1 ]; then
88 exit 1
99fi
1010
11+ LOCAL=NO
1112DEBUG=NO
13+ DEBUG_EXECUTOR=NO
1214WORKERS=1
1315POSITIONAL=()
1416while [[ $# -gt 0 ]]
@@ -25,6 +27,14 @@ case $key in
2527 DEBUG=YES
2628 shift # past argument
2729 ;;
30+ -de|--debug-exec)
31+ DEBUG_EXECUTOR=YES
32+ shift # past argument
33+ ;;
34+ -l|--local)
35+ LOCAL=YES
36+ shift # past argument
37+ ;;
2838 * ) # unknown option
2939 POSITIONAL+=(" $1 " ) # save it in an array for later
3040 shift # past argument
@@ -35,37 +45,85 @@ set -- "${POSITIONAL[@]}" # restore positional parameters
3545
3646echo " DEBUG" = " ${DEBUG} "
3747echo " WORKERS" = " ${WORKERS} "
48+ echo " LOCAL" = " ${LOCAL} "
49+ TEST=NO
50+ # set_speed $WORKERS
51+ # DOCKER=sparkmaster
52+ DOCKER=sparklauncher
53+ # --conf "spark.submit.pyFiles=/build/dike.zip" \
3854if [ ${DEBUG} == " YES" ]; then
3955 echo " Debugging"
40- docker exec -it sparkmaster spark-submit --master local \
56+ docker exec -it ${DOCKER} spark-submit --master local \
4157 --class main.scala.TpchQuery \
4258 --conf " spark.jars.ivy=/build/ivy" \
4359 --conf " spark.driver.maxResultSize=20g" \
4460 --conf " spark.driver.memory=2g" \
4561 --conf " spark.executor.memory=2g" \
46- --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/ -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=172.18.0.3:5005 " \
47- --packages com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
62+ --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*: -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=10.124.48.63:5006 " \
63+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github. scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
4864 --jars /build/downloads/spark-sql-macros_2.12.10_0.1.0-SNAPSHOT.jar,/dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/build/extra_jars/* ,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar,/build/downloads/h2-1.4.200.jar \
4965 /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
5066 # --packages com.github.scopt:scopt_2.12:4.0.0-RC2,com.amazonaws:aws-java-sdk:1.11.853,org.apache.hadoop:hadoop-aws:3.2.0,org.apache.commons:commons-csv:1.8 \
5167 # --conf "spark.sql.parquet.enableVectorizedReader=false" \
52- else
53- docker exec -it sparkmaster spark-submit --master local[$WORKERS ] \
68+ elif [ ${DEBUG_EXECUTOR} == " YES" ]; then
69+ echo " Debugging"
70+ docker exec -it ${DOCKER} spark-submit --master local \
71+ --class main.scala.TpchQuery \
72+ --conf " spark.jars.ivy=/build/ivy" \
73+ --conf " spark.driver.maxResultSize=20g" \
74+ --conf " spark.driver.memory=2g" \
75+ --conf " spark.executor.memory=2g" \
76+ --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/ -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=172.169.1.10:5005" \
77+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
78+ --jars /build/downloads/spark-sql-macros_2.12.10_0.1.0-SNAPSHOT.jar,/dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/build/extra_jars/* ,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar,/build/downloads/h2-1.4.200.jar \
79+ /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
80+ elif [ ${LOCAL} == " YES" ]; then
81+ echo " Local with $WORKERS workers."
82+ docker exec -it ${DOCKER} spark-submit --master local[$WORKERS ] \
5483 --conf " ivy.shared.default.root=/build/ivy_jars" \
55- --conf " spark.driver.extraClassPath=/build/extra_jars/*" \
56- --conf " spark.executor.extraClassPath=/build/extra_jars/*" \
5784 --conf " spark.jars.ivy=/build/ivy" \
5885 --conf " spark.driver.maxResultSize=20g" \
86+ --conf " spark.task.maxDirectResultSize=20g" \
5987 --conf " spark.sql.broadcastTimeout=10000000" \
60- --conf " spark.driver.memory=32g" \
61- --conf " spark.executor.memory=32g" \
88+ --conf " spark.driver.memory=2g" \
89+ --conf " spark.executor.memory=2g" \
90+ --conf " spark.dynamicAllocation.enabled=false" \
6291 --conf " spark.eventLog.enabled=true" \
6392 --conf " spark.eventLog.dir=/build/spark-events" \
6493 --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/" \
65- --packages com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
66- --jars /build/downloads/spark-sql-macros_2.12.10_0.1.0-SNAPSHOT.jar,/dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/build/extra_jars/* ,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar,/build/downloads/h2-1.4.200.jar \
94+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
95+ --jars /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar \
96+ /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
97+ elif [ ${TEST} != " YES" ]; then
98+ # local[$WORKERS]
99+ # spark://172.18.0.2:7077
100+ # --conf "spark.executor.instances=1" \
101+ # --conf "spark.executor.cores=1" \
102+ HOST=sparkmaster
103+ HOST=172.169.1.40
104+ DRIVER_IP=172.169.1.40
105+ docker exec -it ${DOCKER} spark-submit --total-executor-cores $WORKERS \
106+ --master spark://$HOST :7077 \
107+ --conf " ivy.shared.default.root=/build/ivy_jars" \
108+ --conf " spark.jars.ivy=/build/ivy" \
109+ --conf " spark.driver.maxResultSize=20g" \
110+ --conf " spark.task.maxDirectResultSize=20g" \
111+ --conf " spark.sql.broadcastTimeout=10000000" \
112+ --conf " spark.driver.memory=2g" \
113+ --conf " spark.executor.memory=2g" \
114+ --conf " spark.dynamicAllocation.enabled=false" \
115+ --conf " spark.eventLog.enabled=true" \
116+ --conf " spark.eventLog.dir=/build/spark-events" \
117+ --conf " spark.hadoop.dfs.client.use.datanode.hostname=true" \
118+ --conf " spark.hadoop.dfs.namenode.rpc-address=172.169.1.60:9000" \
119+ --conf " spark.driver.host=${DRIVER_IP} " \
120+ --conf " spark.driver.bindAddress=${DRIVER_IP} " \
121+ --conf " spark.driver.extraJavaOptions=-classpath /conf/:/build/spark-${SPARK_VERSION} /jars/*:/examples/scala/target/scala-2.12/" \
122+ --packages com.github.luben:zstd-jni:1.5.0-4,org.json:json:20210307,javax.json:javax.json-api:1.1.4,org.glassfish:javax.json:1.1.4,com.github.scopt:scopt_2.12:4.0.0-RC2,ch.cern.sparkmeasure:spark-measure_2.12:0.17 \
123+ --jars /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar \
67124 /tpch/tpch-spark/target/scala-2.12/spark-tpc-h-queries_2.12-1.0.jar $@ --workers ${WORKERS}
68125fi
126+ # --jars /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar,/pushdown-datasource/target/scala-2.12/pushdown-datasource_2.12-0.1.0.jar \
69127
70128# ,org.dike.hdfs:ndp-hdfs:1.0 /dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0-jar-with-dependencies.jar,
71129# --repositories file:/build/dike \
0 commit comments