@@ -78,8 +78,10 @@ venv: Creates new venv (unless already exists) and installs compare's
7878**********
7979all(default): Data/Run/Compare for all benchmarks
8080tpch: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table, hash join
81+ tpch_csv: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single csv file per table, hash join
8182tpch_mem: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), query from memory
8283tpch10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table, hash join
84+ tpch_csv10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single csv file per table, hash join
8385tpch_mem10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory
8486cancellation: How long cancelling a query takes
8587parquet: Benchmark of parquet reader's filtering speed
@@ -266,9 +268,11 @@ main() {
266268 mkdir -p " ${DATA_DIR} "
267269 case " $BENCHMARK " in
268270 all)
269- run_tpch " 1"
271+ run_tpch " 1" " parquet"
272+ run_tpch " 1" " csv"
270273 run_tpch_mem " 1"
271- run_tpch " 10"
274+ run_tpch " 10" " parquet"
275+ run_tpch " 10" " csv"
272276 run_tpch_mem " 10"
273277 run_cancellation
274278 run_parquet
@@ -286,13 +290,19 @@ main() {
286290 run_external_aggr
287291 ;;
288292 tpch)
289- run_tpch " 1"
293+ run_tpch " 1" " parquet"
294+ ;;
295+ tpch_csv)
296+ run_tpch " 1" " csv"
290297 ;;
291298 tpch_mem)
292299 run_tpch_mem " 1"
293300 ;;
294301 tpch10)
295- run_tpch " 10"
302+ run_tpch " 10" " parquet"
303+ ;;
304+ tpch_csv10)
305+ run_tpch " 10" " csv"
296306 ;;
297307 tpch_mem10)
298308 run_tpch_mem " 10"
@@ -430,6 +440,17 @@ data_tpch() {
430440 $CARGO_COMMAND --bin tpch -- convert --input " ${TPCH_DIR} " --output " ${TPCH_DIR} " --format parquet
431441 popd > /dev/null
432442 fi
443+
444+ # Create 'csv' files from tbl
445+ FILE=" ${TPCH_DIR} /csv/supplier"
446+ if test -d " ${FILE} " ; then
447+ echo " csv files exist ($FILE exists)."
448+ else
449+ echo " creating csv files using benchmark binary ..."
450+ pushd " ${SCRIPT_DIR} " > /dev/null
451+ $CARGO_COMMAND --bin tpch -- convert --input " ${TPCH_DIR} " --output " ${TPCH_DIR} /csv" --format csv
452+ popd > /dev/null
453+ fi
433454}
434455
435456# Runs the tpch benchmark
@@ -446,7 +467,9 @@ run_tpch() {
446467 echo " Running tpch benchmark..."
447468 # Optional query filter to run specific query
448469 QUERY=$( [ -n " $ARG3 " ] && echo " --query $ARG3 " || echo " " )
449- debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format parquet -o " ${RESULTS_FILE} " $QUERY
470+
471+ FORMAT=$2
472+ debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " --format ${FORMAT} -o " ${RESULTS_FILE} " $QUERY
450473}
451474
452475# Runs the tpch in memory
0 commit comments