vortex-data
diff --git a/‎.github/workflows/generate-benchmarks-s3.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/generate-benchmarks-s3.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/sql-benchmarks.yml‎
Lines changed: 9 additions & 9 deletions b/‎.github/workflows/sql-benchmarks.yml‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎bench-vortex/README.md‎
Lines changed: 9 additions & 11 deletions b/‎bench-vortex/README.md‎
Lines changed: 9 additions & 11 deletions
diff --git a/‎bench-vortex/src/benchmark_driver.rs‎
Lines changed: 251 additions & 0 deletions b/‎bench-vortex/src/benchmark_driver.rs‎
Lines changed: 251 additions & 0 deletions
@@ -37,7 +37,7 @@ jobs:
         shell: bash
         run: |
           # We run each query once to make sure we don't upload a file if there's a bug that causes a panic.
-          cargo run --release --bin clickbench --package bench-vortex -- --targets datafusion:parquet,datafusion:vortex -i1
+          cargo run --release --bin query_bench --package bench-vortex -- clickbench --targets datafusion:parquet,datafusion:vortex -i1
           aws s3 rm --recursive s3://vortex-bench-dev-eu/develop/clickbench/
           aws s3 cp --recursive bench-vortex/data/clickbench_partitioned s3://vortex-bench-dev-eu/develop/clickbench/
           rm -rf bench-vortex/data/clickbench_partitioned/
@@ -29,17 +29,17 @@ jobs:
         # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/running-variations-of-jobs-in-a-workflow#example-adding-configurations
         include:
           - id: tpch-nvme
-            binary_name: tpch
+            subcommand: tpch
             name: TPC-H on NVME
             targets: "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb"
             scale_factor: "--scale-factor ${{inputs.scale_factor}}"
           - id: clickbench-nvme
-            binary_name: clickbench
+            subcommand: clickbench
             name: Clickbench on NVME
             targets: "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb"
             scale_factor: ""
           - id: tpch-s3
-            binary_name: tpch
+            subcommand: tpch
             name: TPC-H on S3
             local_dir: bench-vortex/data/tpch/${{inputs.scale_factor}}
             remote_storage: s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/${{inputs.scale_factor}}/
@@ -79,7 +79,7 @@ jobs:
         env:
           RUSTFLAGS: '-C target-cpu=native -C force-frame-pointers=yes'
         run: |
-          cargo build --bin ${{ matrix.binary_name }} --package bench-vortex --profile release_debug
+          cargo build --bin query_bench --package bench-vortex --profile release_debug
 
       - name: Generate data
         shell: bash
@@ -88,11 +88,11 @@ jobs:
         run: |
           # Generate data, running each query once to make sure they don't panic.
           echo "datafusion:parquet"
-          target/release_debug/${{ matrix.binary_name }} --targets datafusion:parquet -i1 -d gh-json ${{ matrix.scale_factor }}
+          target/release_debug/query_bench ${{ matrix.subcommand }} --targets datafusion:parquet -i1 -d gh-json ${{ matrix.scale_factor }}
           echo "datafusion:vortex"
-          target/release_debug/${{ matrix.binary_name }} --targets datafusion:vortex -i1 -d gh-json ${{ matrix.scale_factor }}
+          target/release_debug/query_bench ${{ matrix.subcommand }} --targets datafusion:vortex -i1 -d gh-json ${{ matrix.scale_factor }}
           echo "duckdb:vortex"
-          target/release_debug/${{ matrix.binary_name }} --targets duckdb:vortex -i1 -d gh-json ${{ matrix.scale_factor }}
+          target/release_debug/query_bench ${{ matrix.subcommand }} --targets duckdb:vortex -i1 -d gh-json ${{ matrix.scale_factor }}
 
       - name: Setup AWS CLI
         uses: aws-actions/configure-aws-credentials@v4
@@ -126,7 +126,7 @@ jobs:
           OTEL_EXPORTER_OTLP_HEADERS: '${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}'
           OTEL_RESOURCE_ATTRIBUTES: 'bench-name=${{ matrix.id }}'
         run: |
-          target/release_debug/${{ matrix.binary_name }} \
+          target/release_debug/query_bench ${{ matrix.subcommand }} \
             -d gh-json \
             --targets ${{ matrix.targets }} \
             --export-spans \
@@ -144,7 +144,7 @@ jobs:
           OTEL_EXPORTER_OTLP_HEADERS: '${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}'
           OTEL_RESOURCE_ATTRIBUTES: 'bench-name=${{ matrix.id }}'
         run: |
-          target/release_debug/${{ matrix.binary_name }} \
+          target/release_debug/query_bench ${{ matrix.subcommand }} \
               --use-remote-data-dir ${{ matrix.remote_storage }} \
               --targets ${{ matrix.targets }} \
               --export-spans \
 
@@ -11,29 +11,27 @@ comparing vortex compression to parquet and debugging vortex compression perform
 This binary compresses a file using vortex compression and writes the compressed file to disk where it can be examined
 or used for other operations.
 
-### `comparison.rs`
 
-This binary compresses a dataset using vortex compression and parquet, taking some stats on the compression performance
-of each run, and writes out these stats to a csv.
+### `query_bench`
 
-* This csv can then be loaded into duckdb and analyzed with the included comparison.sql script.
+This is the unified benchmark runner that supports multiple benchmark suites including TPC-H, ClickBench, and TPC-DS.
 
-### `tpch.rs`
+To run the TPC-H benchmarks you can use:
 
-This binary will run TPC-H query 1 using DataFusion, comparing the Vortex in-memory provider against Arrow and CSV.
+```bash
+cargo run --bin query_bench -- tpch
+```
 
-To run the tpch benchmarks you can use:
+To run the ClickBench benchmarks:
 
 ```bash
-cargo run --bin tpch
+cargo run --bin query_bench -- clickbench
 ```
 
-There are also clickbench and tpc-ds benchmarks, which can be run similarly.
-
 For profiling, you can open in Instruments using the following invocation:
 
 ```
-cargo instruments -p bench-vortex --bin tpch --template Time --profile bench
+cargo instruments -p bench-vortex --bin query_bench --template Time --profile bench -- tpch
 ```
 
 ### Data directory
 
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Benchmark driver that handles CLI logic and orchestrates benchmark execution
+
+use std::path::PathBuf;
+
+use anyhow::Result;
+use indicatif::ProgressBar;
+use itertools::Itertools;
+use log::warn;
+use vortex_datafusion::metrics::VortexMetricsFinder;
+
+use crate::benchmark_trait::Benchmark;
+use crate::display::DisplayFormat;
+use crate::engines::{EngineCtx, benchmark_datafusion_query, benchmark_duckdb_query};
+use crate::measurements::QueryMeasurement;
+use crate::metrics::{MetricsSetExt, export_plan_spans};
+use crate::query_bench::{filter_queries, print_results, setup_logging_and_tracing};
+use crate::utils::{new_tokio_runtime, url_scheme_to_storage};
+use crate::{Engine, Format, Target, df, vortex_panic};
+
+/// Configuration for the benchmark driver
+pub struct DriverConfig {
+    pub targets: Vec<Target>,
+    pub iterations: usize,
+    pub threads: Option<usize>,
+    pub verbose: bool,
+    pub display_format: DisplayFormat,
+    pub disable_datafusion_cache: bool,
+    pub queries: Option<Vec<usize>>,
+    pub exclude_queries: Option<Vec<usize>>,
+    pub output_path: Option<PathBuf>,
+    pub emit_plan: bool,
+    pub export_spans: bool,
+    pub show_metrics: bool,
+    pub hide_progress_bar: bool,
+}
+
+/// Run a benchmark using the provided implementation and configuration
+pub fn run_benchmark<B: Benchmark>(benchmark: B, config: DriverConfig) -> Result<()> {
+    let _trace_guard = setup_logging_and_tracing(
+        config.verbose,
+        &format!("{}.trace.json", benchmark.dataset_name()),
+    )?;
+
+    // Validate arguments
+    validate_args(&config)?;
+
+    // Generate data for each target (idempotent)
+    for target in &config.targets {
+        benchmark.generate_data(target)?;
+    }
+
+    let filtered_queries = filter_queries(
+        benchmark.queries()?,
+        config.queries.as_ref(),
+        config.exclude_queries.as_ref(),
+    );
+
+    let progress_bar = if config.hide_progress_bar {
+        ProgressBar::hidden()
+    } else {
+        ProgressBar::new((filtered_queries.len() * config.targets.len()) as u64)
+    };
+
+    let mut query_measurements = Vec::new();
+
+    for target in config.targets.iter() {
+        let tokio_runtime = new_tokio_runtime(config.threads);
+
+        let mut engine_ctx = benchmark.setup_engine_context(
+            target,
+            config.disable_datafusion_cache,
+            config.emit_plan,
+        )?;
+
+        tokio_runtime.block_on(benchmark.register_tables(&engine_ctx, target.format()))?;
+
+        let bench_measurements = execute_queries(
+            &filtered_queries,
+            config.iterations,
+            &tokio_runtime,
+            target.format(),
+            &progress_bar,
+            &mut engine_ctx,
+            &benchmark,
+        )?;
+
+        tokio_runtime.block_on(export_metrics_if_requested(
+            &engine_ctx,
+            config.export_spans,
+        ))?;
+
+        if config.show_metrics {
+            print_metrics(&engine_ctx);
+        }
+
+        query_measurements.extend(bench_measurements);
+    }
+
+    print_results(
+        &config.display_format,
+        query_measurements,
+        &config.targets,
+        &config.output_path,
+    )
+}
+
+fn validate_args(config: &DriverConfig) -> Result<()> {
+    let engines = config
+        .targets
+        .iter()
+        .map(|t| t.engine())
+        .unique()
+        .collect_vec();
+
+    if (config.emit_plan || config.export_spans || config.show_metrics || config.threads.is_some())
+        && !engines.contains(&Engine::DataFusion)
+    {
+        vortex_panic!(
+            "--emit-plan, --export-spans, --show-metrics, --threads are only valid if DataFusion is used"
+        );
+    }
+    Ok(())
+}
+
+fn execute_queries<B: Benchmark>(
+    queries: &[(usize, String)],
+    iterations: usize,
+    runtime: &tokio::runtime::Runtime,
+    format: Format,
+    progress_bar: &ProgressBar,
+    engine_ctx: &mut EngineCtx,
+    benchmark: &B,
+) -> Result<Vec<QueryMeasurement>> {
+    let mut query_measurements = Vec::new();
+    let expected_row_counts = benchmark.expected_row_counts();
+
+    for &(query_idx, ref query_string) in queries.iter() {
+        match engine_ctx {
+            EngineCtx::DataFusion(ctx) => {
+                let (runs, (row_count, execution_plan)) = runtime.block_on(async {
+                    benchmark_datafusion_query(iterations, || async {
+                        let (batches, plan) = df::execute_query(&ctx.session, query_string)
+                            .await
+                            .unwrap_or_else(|err| {
+                                vortex_panic!("query: {query_idx} failed with: {err}")
+                            });
+                        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+                        (row_count, plan)
+                    })
+                    .await
+                });
+
+                // Validate row count if expected counts are provided
+                if let Some(expected_counts) = expected_row_counts {
+                    if query_idx < expected_counts.len() {
+                        assert_eq!(
+                            row_count, expected_counts[query_idx],
+                            "Row count mismatch for query {query_idx} - datafusion:{format}",
+                        );
+                    }
+                }
+
+                ctx.execution_plans
+                    .push((query_idx, execution_plan.clone()));
+
+                if ctx.emit_plan {
+                    df::write_execution_plan(
+                        query_idx,
+                        format,
+                        benchmark.dataset_name(),
+                        execution_plan.as_ref(),
+                    );
+                }
+
+                ctx.metrics.push((
+                    query_idx,
+                    format,
+                    VortexMetricsFinder::find_all(execution_plan.as_ref()),
+                ));
+
+                query_measurements.push(QueryMeasurement {
+                    query_idx,
+                    target: Target::new(Engine::DataFusion, format),
+                    benchmark_dataset: benchmark.dataset(),
+                    storage: url_scheme_to_storage(benchmark.data_url())?,
+                    runs,
+                });
+            }
+            EngineCtx::DuckDB(ctx) => {
+                let (runs, row_count) =
+                    benchmark_duckdb_query(query_idx, query_string, iterations, ctx);
+
+                // Validate row count if expected counts are provided
+                if let Some(expected_counts) = expected_row_counts {
+                    if query_idx < expected_counts.len() {
+                        assert_eq!(
+                            row_count, expected_counts[query_idx],
+                            "Row count mismatch for query {query_idx} - duckdb:{format}",
+                        );
+                    }
+                }
+
+                query_measurements.push(QueryMeasurement {
+                    query_idx,
+                    target: Target::new(Engine::DuckDB, format),
+                    benchmark_dataset: benchmark.dataset(),
+                    storage: url_scheme_to_storage(benchmark.data_url())?,
+                    runs,
+                });
+            }
+        }
+
+        progress_bar.inc(1);
+    }
+
+    Ok(query_measurements)
+}
+
+async fn export_metrics_if_requested(engine_ctx: &EngineCtx, export_spans: bool) -> Result<()> {
+    if let EngineCtx::DataFusion(ctx) = engine_ctx {
+        if export_spans {
+            if let Err(err) = export_plan_spans(Format::OnDiskVortex, &ctx.execution_plans).await {
+                warn!("failed to export spans {err}");
+            }
+        }
+    }
+    Ok(())
+}
+
+fn print_metrics(engine_ctx: &EngineCtx) {
+    if let EngineCtx::DataFusion(ctx) = engine_ctx {
+        for (query_idx, file_format, metric_sets) in &ctx.metrics {
+            eprintln!("metrics for query={query_idx}, {file_format}:");
+            for (scan_idx, metrics_set) in metric_sets.iter().enumerate() {
+                eprintln!("scan[{scan_idx}]:");
+                for metric in metrics_set
+                    .clone()
+                    .timestamps_removed()
+                    .aggregate()
+                    .sorted_for_display()
+                    .iter()
+                {
+                    eprintln!("{metric}");
+                }
+            }
+        }
+    }
+}