Skip to content

Commit deeff88

Browse files
authored
debug cargo command in bench script (#16236)
1 parent 5b08b84 commit deeff88

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

benchmarks/bench.sh

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ set -e
2828
# https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
2929
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
3030

31+
# Execute command and also print it, for debugging purposes
32+
debug_run() {
33+
set -x
34+
"$@"
35+
set +x
36+
}
3137

3238
# Set Defaults
3339
COMMAND=
@@ -435,10 +441,7 @@ run_tpch() {
435441
echo "Running tpch benchmark..."
436442
# Optional query filter to run specific query
437443
QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
438-
# debug the target command
439-
set -x
440-
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}" $QUERY
441-
set +x
444+
debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}" $QUERY
442445
}
443446

444447
# Runs the tpch in memory
@@ -455,35 +458,32 @@ run_tpch_mem() {
455458
echo "Running tpch_mem benchmark..."
456459
# Optional query filter to run specific query
457460
QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
458-
# debug the target command
459-
set -x
460461
# -m means in memory
461-
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" $QUERY
462-
set +x
462+
debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" $QUERY
463463
}
464464

465465
# Runs the cancellation benchmark
466466
run_cancellation() {
467467
RESULTS_FILE="${RESULTS_DIR}/cancellation.json"
468468
echo "RESULTS_FILE: ${RESULTS_FILE}"
469469
echo "Running cancellation benchmark..."
470-
$CARGO_COMMAND --bin dfbench -- cancellation --iterations 5 --path "${DATA_DIR}/cancellation" -o "${RESULTS_FILE}"
470+
debug_run $CARGO_COMMAND --bin dfbench -- cancellation --iterations 5 --path "${DATA_DIR}/cancellation" -o "${RESULTS_FILE}"
471471
}
472472

473473
# Runs the parquet filter benchmark
474474
run_parquet() {
475475
RESULTS_FILE="${RESULTS_DIR}/parquet.json"
476476
echo "RESULTS_FILE: ${RESULTS_FILE}"
477477
echo "Running parquet filter benchmark..."
478-
$CARGO_COMMAND --bin parquet -- filter --path "${DATA_DIR}" --scale-factor 1.0 --iterations 5 -o "${RESULTS_FILE}"
478+
debug_run $CARGO_COMMAND --bin parquet -- filter --path "${DATA_DIR}" --scale-factor 1.0 --iterations 5 -o "${RESULTS_FILE}"
479479
}
480480

481481
# Runs the sort benchmark
482482
run_sort() {
483483
RESULTS_FILE="${RESULTS_DIR}/sort.json"
484484
echo "RESULTS_FILE: ${RESULTS_FILE}"
485485
echo "Running sort benchmark..."
486-
$CARGO_COMMAND --bin parquet -- sort --path "${DATA_DIR}" --scale-factor 1.0 --iterations 5 -o "${RESULTS_FILE}"
486+
debug_run $CARGO_COMMAND --bin parquet -- sort --path "${DATA_DIR}" --scale-factor 1.0 --iterations 5 -o "${RESULTS_FILE}"
487487
}
488488

489489

@@ -537,23 +537,23 @@ run_clickbench_1() {
537537
RESULTS_FILE="${RESULTS_DIR}/clickbench_1.json"
538538
echo "RESULTS_FILE: ${RESULTS_FILE}"
539539
echo "Running clickbench (1 file) benchmark..."
540-
$CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits.parquet" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries.sql" -o "${RESULTS_FILE}"
540+
debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits.parquet" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries.sql" -o "${RESULTS_FILE}"
541541
}
542542

543543
# Runs the clickbench benchmark with the partitioned parquet files
544544
run_clickbench_partitioned() {
545545
RESULTS_FILE="${RESULTS_DIR}/clickbench_partitioned.json"
546546
echo "RESULTS_FILE: ${RESULTS_FILE}"
547547
echo "Running clickbench (partitioned, 100 files) benchmark..."
548-
$CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits_partitioned" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries.sql" -o "${RESULTS_FILE}"
548+
debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits_partitioned" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries.sql" -o "${RESULTS_FILE}"
549549
}
550550

551551
# Runs the clickbench "extended" benchmark with a single large parquet file
552552
run_clickbench_extended() {
553553
RESULTS_FILE="${RESULTS_DIR}/clickbench_extended.json"
554554
echo "RESULTS_FILE: ${RESULTS_FILE}"
555555
echo "Running clickbench (1 file) extended benchmark..."
556-
$CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits.parquet" --queries-path "${SCRIPT_DIR}/queries/clickbench/extended.sql" -o "${RESULTS_FILE}"
556+
debug_run $CARGO_COMMAND --bin dfbench -- clickbench --iterations 5 --path "${DATA_DIR}/hits.parquet" --queries-path "${SCRIPT_DIR}/queries/clickbench/extended.sql" -o "${RESULTS_FILE}"
557557
}
558558

559559
# Downloads the csv.gz files IMDB datasets from Peter Boncz's homepage(one of the JOB paper authors)
@@ -668,7 +668,7 @@ run_imdb() {
668668
RESULTS_FILE="${RESULTS_DIR}/imdb.json"
669669
echo "RESULTS_FILE: ${RESULTS_FILE}"
670670
echo "Running imdb benchmark..."
671-
$CARGO_COMMAND --bin imdb -- benchmark datafusion --iterations 5 --path "${IMDB_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}"
671+
debug_run $CARGO_COMMAND --bin imdb -- benchmark datafusion --iterations 5 --path "${IMDB_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}"
672672
}
673673

674674
data_h2o() {
@@ -859,7 +859,7 @@ run_h2o() {
859859
QUERY_FILE="${SCRIPT_DIR}/queries/h2o/${RUN_Type}.sql"
860860

861861
# Run the benchmark using the dynamically constructed file path and query file
862-
$CARGO_COMMAND --bin dfbench -- h2o \
862+
debug_run $CARGO_COMMAND --bin dfbench -- h2o \
863863
--iterations 3 \
864864
--path "${H2O_DIR}/${FILE_NAME}" \
865865
--queries-path "${QUERY_FILE}" \
@@ -910,7 +910,7 @@ h2o_runner() {
910910
# Set the query file name based on the RUN_Type
911911
QUERY_FILE="${SCRIPT_DIR}/queries/h2o/${RUN_Type}.sql"
912912

913-
$CARGO_COMMAND --bin dfbench -- h2o \
913+
debug_run $CARGO_COMMAND --bin dfbench -- h2o \
914914
--iterations 3 \
915915
--join-paths "${H2O_DIR}/${X_TABLE_FILE_NAME},${H2O_DIR}/${SMALL_TABLE_FILE_NAME},${H2O_DIR}/${MEDIUM_TABLE_FILE_NAME},${H2O_DIR}/${LARGE_TABLE_FILE_NAME}" \
916916
--queries-path "${QUERY_FILE}" \
@@ -940,7 +940,7 @@ run_external_aggr() {
940940
# number-of-partitions), and by default `--partitions` is set to number of
941941
# CPU cores, we set a constant number of partitions to prevent this
942942
# benchmark to fail on some machines.
943-
$CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}"
943+
debug_run $CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}"
944944
}
945945

946946
# Runs the sort integration benchmark
@@ -950,7 +950,7 @@ run_sort_tpch() {
950950
echo "RESULTS_FILE: ${RESULTS_FILE}"
951951
echo "Running sort tpch benchmark..."
952952

953-
$CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}"
953+
debug_run $CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}"
954954
}
955955

956956

0 commit comments

Comments
 (0)