Skip to content

Commit ca0b760

Browse files
authored
cleanup bench.sh usage message (#16416)
1 parent 42fe050 commit ca0b760

File tree

1 file changed

+30
-16
lines changed

1 file changed

+30
-16
lines changed

benchmarks/bench.sh

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -55,42 +55,49 @@ $0 compare <branch1> <branch2>
5555
$0 compare_detail <branch1> <branch2>
5656
$0 venv
5757
58-
**********
58+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
5959
Examples:
60-
**********
60+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
6161
# Create the datasets for all benchmarks in $DATA_DIR
6262
./bench.sh data
6363
6464
# Run the 'tpch' benchmark on the datafusion checkout in /source/datafusion
6565
DATAFUSION_DIR=/source/datafusion ./bench.sh run tpch
6666
67-
**********
68-
* Commands
69-
**********
67+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
68+
Commands
69+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
7070
data: Generates or downloads data needed for benchmarking
7171
run: Runs the named benchmark
7272
compare: Compares fastest results from benchmark runs
7373
compare_detail: Compares minimum, average (±stddev), and maximum results from benchmark runs
7474
venv: Creates new venv (unless already exists) and installs compare's requirements into it
7575
76-
**********
77-
* Benchmarks
78-
**********
76+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
77+
Benchmarks
78+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
79+
# Run all of the following benchmarks
7980
all(default): Data/Run/Compare for all benchmarks
81+
82+
# TPC-H Benchmarks
8083
tpch: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single parquet file per table, hash join
8184
tpch_csv: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), single csv file per table, hash join
8285
tpch_mem: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), query from memory
8386
tpch10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table, hash join
8487
tpch_csv10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single csv file per table, hash join
8588
tpch_mem10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory
86-
cancellation: How long cancelling a query takes
87-
parquet: Benchmark of parquet reader's filtering speed
88-
sort: Benchmark of sorting speed
89-
sort_tpch: Benchmark of sorting speed for end-to-end sort queries on TPCH dataset
89+
90+
# Extended TPC-H Benchmarks
91+
sort_tpch: Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=1)
92+
topk_tpch: Benchmark of top-k (sorting with limit) queries on TPC-H dataset (SF=1)
93+
external_aggr: External aggregation benchmark on TPC-H dataset (SF=1)
94+
95+
# ClickBench Benchmarks
9096
clickbench_1: ClickBench queries against a single parquet file
9197
clickbench_partitioned: ClickBench queries against a partitioned (100 files) parquet
9298
clickbench_extended: ClickBench \"inspired\" queries against a single parquet (DataFusion specific)
93-
external_aggr: External aggregation benchmark
99+
100+
# H2O.ai Benchmarks (Group By, Join, Window)
94101
h2o_small: h2oai benchmark with small dataset (1e7 rows) for groupby, default file format is csv
95102
h2o_medium: h2oai benchmark with medium dataset (1e8 rows) for groupby, default file format is csv
96103
h2o_big: h2oai benchmark with large dataset (1e9 rows) for groupby, default file format is csv
@@ -100,11 +107,18 @@ h2o_big_join: h2oai benchmark with large dataset (1e9 rows) for join,
100107
h2o_small_window: Extended h2oai benchmark with small dataset (1e7 rows) for window, default file format is csv
101108
h2o_medium_window: Extended h2oai benchmark with medium dataset (1e8 rows) for window, default file format is csv
102109
h2o_big_window: Extended h2oai benchmark with large dataset (1e9 rows) for window, default file format is csv
110+
111+
# Join Order Benchmark (IMDB)
103112
imdb: Join Order Benchmark (JOB) using the IMDB dataset converted to parquet
104113
105-
**********
106-
* Supported Configuration (Environment Variables)
107-
**********
114+
# Micro-Benchmarks (specific operators and features)
115+
cancellation: How long cancelling a query takes
116+
parquet: Benchmark of parquet reader's filtering speed
117+
sort: Benchmark of sorting speed
118+
119+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
120+
Supported Configuration (Environment Variables)
121+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
108122
DATA_DIR directory to store datasets
109123
CARGO_COMMAND command that runs the benchmark binary
110124
DATAFUSION_DIR directory to use (default $DATAFUSION_DIR)

0 commit comments

Comments
 (0)