@@ -28,6 +28,12 @@ set -e
2828# https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
2929SCRIPT_DIR=$( cd -- " $( dirname -- " ${BASH_SOURCE[0]} " ) " & > /dev/null && pwd )
3030
31+ # Execute command and also print it, for debugging purposes
32+ debug_run () {
33+ set -x
34+ " $@ "
35+ set +x
36+ }
3137
3238# Set Defaults
3339COMMAND=
@@ -435,10 +441,7 @@ run_tpch() {
435441 echo " Running tpch benchmark..."
436442 # Optional query filter to run specific query
437443 QUERY=$( [ -n " $ARG3 " ] && echo " --query $ARG3 " || echo " " )
438- # debug the target command
439- set -x
440- $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} " $QUERY
441- set +x
444+ debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} " $QUERY
442445}
443446
444447# Runs the tpch in memory
@@ -455,35 +458,32 @@ run_tpch_mem() {
455458 echo " Running tpch_mem benchmark..."
456459 # Optional query filter to run specific query
457460 QUERY=$( [ -n " $ARG3 " ] && echo " --query $ARG3 " || echo " " )
458- # debug the target command
459- set -x
460461 # -m means in memory
461- $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " -m --format parquet -o " ${RESULTS_FILE} " $QUERY
462- set +x
462+ debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " -m --format parquet -o " ${RESULTS_FILE} " $QUERY
463463}
464464
465465# Runs the cancellation benchmark
466466run_cancellation () {
467467 RESULTS_FILE=" ${RESULTS_DIR} /cancellation.json"
468468 echo " RESULTS_FILE: ${RESULTS_FILE} "
469469 echo " Running cancellation benchmark..."
470- $CARGO_COMMAND --bin dfbench -- cancellation --iterations 5 --path " ${DATA_DIR} /cancellation" -o " ${RESULTS_FILE} "
470+ debug_run $CARGO_COMMAND --bin dfbench -- cancellation --iterations 5 --path " ${DATA_DIR} /cancellation" -o " ${RESULTS_FILE} "
471471}
472472
473473# Runs the parquet filter benchmark
474474run_parquet () {
475475 RESULTS_FILE=" ${RESULTS_DIR} /parquet.json"
476476 echo " RESULTS_FILE: ${RESULTS_FILE} "
477477 echo " Running parquet filter benchmark..."
478- $CARGO_COMMAND --bin parquet -- filter --path " ${DATA_DIR} " --scale-factor 1.0 --iterations 5 -o " ${RESULTS_FILE} "
478+ debug_run $CARGO_COMMAND --bin parquet -- filter --path " ${DATA_DIR} " --scale-factor 1.0 --iterations 5 -o " ${RESULTS_FILE} "
479479}
480480
481481# Runs the sort benchmark
482482run_sort () {
483483 RESULTS_FILE=" ${RESULTS_DIR} /sort.json"
484484 echo " RESULTS_FILE: ${RESULTS_FILE} "
485485 echo " Running sort benchmark..."
486- $CARGO_COMMAND --bin parquet -- sort --path " ${DATA_DIR} " --scale-factor 1.0 --iterations 5 -o " ${RESULTS_FILE} "
486+ debug_run $CARGO_COMMAND --bin parquet -- sort --path " ${DATA_DIR} " --scale-factor 1.0 --iterations 5 -o " ${RESULTS_FILE} "
487487}
488488
489489
@@ -537,23 +537,23 @@ run_clickbench_1() {
537537 RESULTS_FILE=" ${RESULTS_DIR} /clickbench_1.json"
538538 echo " RESULTS_FILE: ${RESULTS_FILE} "
539539 echo " Running clickbench (1 file) benchmark..."
540- $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path " ${DATA_DIR} /hits.parquet" --queries-path " ${SCRIPT_DIR} /queries/clickbench/queries.sql" -o " ${RESULTS_FILE} "
540+ debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path " ${DATA_DIR} /hits.parquet" --queries-path " ${SCRIPT_DIR} /queries/clickbench/queries.sql" -o " ${RESULTS_FILE} "
541541}
542542
543543 # Runs the clickbench benchmark with the partitioned parquet files
544544run_clickbench_partitioned () {
545545 RESULTS_FILE=" ${RESULTS_DIR} /clickbench_partitioned.json"
546546 echo " RESULTS_FILE: ${RESULTS_FILE} "
547547 echo " Running clickbench (partitioned, 100 files) benchmark..."
548- $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path " ${DATA_DIR} /hits_partitioned" --queries-path " ${SCRIPT_DIR} /queries/clickbench/queries.sql" -o " ${RESULTS_FILE} "
548+ debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path " ${DATA_DIR} /hits_partitioned" --queries-path " ${SCRIPT_DIR} /queries/clickbench/queries.sql" -o " ${RESULTS_FILE} "
549549}
550550
551551# Runs the clickbench "extended" benchmark with a single large parquet file
552552run_clickbench_extended () {
553553 RESULTS_FILE=" ${RESULTS_DIR} /clickbench_extended.json"
554554 echo " RESULTS_FILE: ${RESULTS_FILE} "
555555 echo " Running clickbench (1 file) extended benchmark..."
556- $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path " ${DATA_DIR} /hits.parquet" --queries-path " ${SCRIPT_DIR} /queries/clickbench/extended.sql" -o " ${RESULTS_FILE} "
556+ debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path " ${DATA_DIR} /hits.parquet" --queries-path " ${SCRIPT_DIR} /queries/clickbench/extended.sql" -o " ${RESULTS_FILE} "
557557}
558558
559559# Downloads the csv.gz files IMDB datasets from Peter Boncz's homepage(one of the JOB paper authors)
@@ -668,7 +668,7 @@ run_imdb() {
668668 RESULTS_FILE=" ${RESULTS_DIR} /imdb.json"
669669 echo " RESULTS_FILE: ${RESULTS_FILE} "
670670 echo " Running imdb benchmark..."
671- $CARGO_COMMAND --bin imdb -- benchmark datafusion --iterations 5 --path " ${IMDB_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} "
671+ debug_run $CARGO_COMMAND --bin imdb -- benchmark datafusion --iterations 5 --path " ${IMDB_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} "
672672}
673673
674674data_h2o () {
@@ -859,7 +859,7 @@ run_h2o() {
859859 QUERY_FILE=" ${SCRIPT_DIR} /queries/h2o/${RUN_Type} .sql"
860860
861861 # Run the benchmark using the dynamically constructed file path and query file
862- $CARGO_COMMAND --bin dfbench -- h2o \
862+ debug_run $CARGO_COMMAND --bin dfbench -- h2o \
863863 --iterations 3 \
864864 --path " ${H2O_DIR} /${FILE_NAME} " \
865865 --queries-path " ${QUERY_FILE} " \
@@ -910,7 +910,7 @@ h2o_runner() {
910910 # Set the query file name based on the RUN_Type
911911 QUERY_FILE=" ${SCRIPT_DIR} /queries/h2o/${RUN_Type} .sql"
912912
913- $CARGO_COMMAND --bin dfbench -- h2o \
913+ debug_run $CARGO_COMMAND --bin dfbench -- h2o \
914914 --iterations 3 \
915915 --join-paths " ${H2O_DIR} /${X_TABLE_FILE_NAME} ,${H2O_DIR} /${SMALL_TABLE_FILE_NAME} ,${H2O_DIR} /${MEDIUM_TABLE_FILE_NAME} ,${H2O_DIR} /${LARGE_TABLE_FILE_NAME} " \
916916 --queries-path " ${QUERY_FILE} " \
@@ -940,7 +940,7 @@ run_external_aggr() {
940940 # number-of-partitions), and by default `--partitions` is set to number of
941941 # CPU cores, we set a constant number of partitions to prevent this
942942 # benchmark to fail on some machines.
943- $CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path " ${TPCH_DIR} " -o " ${RESULTS_FILE} "
943+ debug_run $CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path " ${TPCH_DIR} " -o " ${RESULTS_FILE} "
944944}
945945
946946# Runs the sort integration benchmark
@@ -950,7 +950,7 @@ run_sort_tpch() {
950950 echo " RESULTS_FILE: ${RESULTS_FILE} "
951951 echo " Running sort tpch benchmark..."
952952
953- $CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path " ${TPCH_DIR} " -o " ${RESULTS_FILE} "
953+ debug_run $CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path " ${TPCH_DIR} " -o " ${RESULTS_FILE} "
954954}
955955
956956
0 commit comments