Skip to content

Commit de1d40a

Browse files
committed
Fix remote SQL
Signed-off-by: Adam Gutglick <[email protected]>
1 parent 8bf51c9 commit de1d40a

File tree

5 files changed

+51
-45
lines changed

5 files changed

+51
-45
lines changed

benchmarks/ddb-bench/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ async fn main() -> anyhow::Result<()> {
7575

7676
setup_logging_and_tracing(args.verbose, args.tracing)?;
7777

78-
let benchmark = create_benchmark(args.benchmark, opts)?;
78+
let benchmark = create_benchmark(args.benchmark, &opts)?;
7979

8080
let filtered_queries = filter_queries(
8181
benchmark.queries()?,

benchmarks/df-bench/src/lib.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ pub fn get_session_context() -> SessionContext {
6565
}
6666

6767
pub fn make_object_store(
68-
df: &SessionContext,
68+
session: &SessionContext,
6969
source: &Url,
7070
) -> anyhow::Result<Arc<dyn ObjectStore>> {
7171
match source.scheme() {
@@ -76,7 +76,8 @@ pub fn make_object_store(
7676
.with_bucket_name(bucket_name)
7777
.build()?,
7878
);
79-
df.register_object_store(&Url::parse(&format!("s3://{bucket_name}/"))?, s3.clone());
79+
session
80+
.register_object_store(&Url::parse(&format!("s3://{bucket_name}/"))?, s3.clone());
8081
Ok(s3)
8182
}
8283
"gs" => {
@@ -86,12 +87,13 @@ pub fn make_object_store(
8687
.with_bucket_name(bucket_name)
8788
.build()?,
8889
);
89-
df.register_object_store(&Url::parse(&format!("gs://{bucket_name}/"))?, gcs.clone());
90+
session
91+
.register_object_store(&Url::parse(&format!("gs://{bucket_name}/"))?, gcs.clone());
9092
Ok(gcs)
9193
}
9294
_ => {
9395
let fs = Arc::new(LocalFileSystem::default());
94-
df.register_object_store(&Url::parse("file:/")?, fs.clone());
96+
session.register_object_store(&Url::parse("file:/")?, fs.clone());
9597
Ok(fs)
9698
}
9799
}

benchmarks/df-bench/src/main.rs

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ async fn main() -> anyhow::Result<()> {
104104

105105
setup_logging_and_tracing(args.verbose, args.tracing)?;
106106

107-
let benchmark = create_benchmark(args.benchmark, opts)?;
107+
let benchmark = create_benchmark(args.benchmark, &opts)?;
108108

109109
let filtered_queries = filter_queries(
110110
benchmark.queries()?,
@@ -151,6 +151,7 @@ async fn main() -> anyhow::Result<()> {
151151
args.iterations,
152152
|format| async move {
153153
let session = df_bench::get_session_context();
154+
df_bench::make_object_store(&session, bench_ref.data_url())?;
154155
register_benchmark_tables(&session, bench_ref, format).await?;
155156
Ok(session)
156157
},
@@ -178,32 +179,33 @@ async fn register_benchmark_tables<B: Benchmark + ?Sized>(
178179
benchmark: &B,
179180
format: Format,
180181
) -> anyhow::Result<()> {
181-
if matches!(format, Format::Arrow) {
182-
register_arrow_tables(session, benchmark).await
183-
} else {
184-
let benchmark_base = benchmark.data_url().join(&format!("{}/", format.name()))?;
185-
let file_format = format_to_df_format(format);
182+
match format {
183+
Format::Arrow => register_arrow_tables(session, benchmark).await,
184+
_ => {
185+
let benchmark_base = benchmark.data_url().join(&format!("{}/", format.name()))?;
186+
let file_format = format_to_df_format(format);
186187

187-
for table in benchmark.table_specs().iter() {
188-
let pattern = benchmark.pattern(table.name, format);
189-
let table_url = ListingTableUrl::try_new(benchmark_base.clone(), pattern)?;
188+
for table in benchmark.table_specs().iter() {
189+
let pattern = benchmark.pattern(table.name, format);
190+
let table_url = ListingTableUrl::try_new(benchmark_base.clone(), pattern)?;
190191

191-
let mut config = ListingTableConfig::new(table_url).with_listing_options(
192-
ListingOptions::new(file_format.clone())
193-
.with_session_config_options(session.state().config()),
194-
);
192+
let mut config = ListingTableConfig::new(table_url).with_listing_options(
193+
ListingOptions::new(file_format.clone())
194+
.with_session_config_options(session.state().config()),
195+
);
195196

196-
config = match table.schema.as_ref() {
197-
Some(schema) => config.with_schema(Arc::new(schema.clone())),
198-
None => config.infer_schema(&session.state()).await?,
199-
};
197+
config = match table.schema.as_ref() {
198+
Some(schema) => config.with_schema(Arc::new(schema.clone())),
199+
None => config.infer_schema(&session.state()).await?,
200+
};
200201

201-
let listing_table = Arc::new(ListingTable::try_new(config)?);
202+
let listing_table = Arc::new(ListingTable::try_new(config)?);
202203

203-
session.register_table(table.name, listing_table)?;
204-
}
204+
session.register_table(table.name, listing_table)?;
205+
}
205206

206-
Ok(())
207+
Ok(())
208+
}
207209
}
208210
}
209211

benchmarks/lance-bench/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ async fn main() -> anyhow::Result<()> {
7474

7575
setup_logging_and_tracing(args.verbose, args.tracing)?;
7676

77-
let benchmark = create_benchmark(args.benchmark, opts)?;
77+
let benchmark = create_benchmark(args.benchmark, &opts)?;
7878

7979
let filtered_queries = filter_queries(
8080
benchmark.queries()?,

vortex-bench/src/lib.rs

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,15 @@ use std::sync::LazyLock;
1111

1212
use anyhow::bail;
1313
use clap::ValueEnum;
14+
use clickbench::ClickBenchBenchmark;
15+
use clickbench::Flavor;
16+
use fineweb::FinewebBenchmark;
1417
use itertools::Itertools;
18+
use realnest::gharchive::GithubArchiveBenchmark;
1519
use serde::Serialize;
20+
use statpopgen::StatPopGenBenchmark;
21+
use tpcds::TpcDsBenchmark;
22+
use tpch::benchmark::TpcHBenchmark;
1623
pub use utils::file::*;
1724
pub use utils::logging::*;
1825
use vortex::error::VortexUnwrap;
@@ -238,55 +245,50 @@ pub enum BenchmarkArg {
238245
/// Default scale factor for TPC-related benchmarks
239246
const DEFAULT_SCALE_FACTOR: &str = "1.0";
240247

241-
/// Factory function to create a benchmark instance from CLI arguments.
242-
pub fn create_benchmark(b: BenchmarkArg, opts: Opts) -> anyhow::Result<Box<dyn Benchmark>> {
243-
use clickbench::ClickBenchBenchmark;
244-
use clickbench::Flavor;
245-
use fineweb::FinewebBenchmark;
246-
use realnest::gharchive::GithubArchiveBenchmark;
247-
use statpopgen::StatPopGenBenchmark;
248-
use tpcds::TpcDsBenchmark;
249-
use tpch::benchmark::TpcHBenchmark;
248+
const SCALE_FACTOR_KEY: &str = "scale_factor";
249+
const REMOTE_DATA_KEY: &str = "remote_data_dir";
250250

251+
/// Factory function to create a benchmark instance from CLI arguments.
252+
pub fn create_benchmark(b: BenchmarkArg, opts: &Opts) -> anyhow::Result<Box<dyn Benchmark>> {
251253
match b {
252254
BenchmarkArg::ClickBench => {
253255
let flavor = opts.get_as::<Flavor>("flavor").unwrap_or_default();
254-
let remote_data_dir = opts.get_as::<String>("remote_data_dir");
256+
let remote_data_dir = opts.get_as::<String>(REMOTE_DATA_KEY);
255257
let benchmark = ClickBenchBenchmark::new(flavor, None, remote_data_dir)?;
256258
Ok(Box::new(benchmark) as _)
257259
}
258260
BenchmarkArg::TpcH => {
259-
let scale_factor = opts.get("scale_factor").unwrap_or(DEFAULT_SCALE_FACTOR);
260-
let remote_data_dir = opts.get_as::<String>("remote_data_dir");
261+
let scale_factor = opts.get(SCALE_FACTOR_KEY).unwrap_or(DEFAULT_SCALE_FACTOR);
262+
let remote_data_dir = opts.get_as::<String>(REMOTE_DATA_KEY);
261263
let benchmark = TpcHBenchmark::new(scale_factor.to_string(), remote_data_dir)?;
262264
Ok(Box::new(benchmark) as _)
263265
}
264266
BenchmarkArg::TpcDS => {
265-
let scale_factor = opts.get("scale_factor").unwrap_or(DEFAULT_SCALE_FACTOR);
266-
let remote_data_dir = opts.get_as::<String>("remote_data_dir");
267+
let scale_factor = opts.get(SCALE_FACTOR_KEY).unwrap_or(DEFAULT_SCALE_FACTOR);
268+
let remote_data_dir = opts.get_as::<String>(REMOTE_DATA_KEY);
267269
let benchmark = TpcDsBenchmark::new(scale_factor.to_string(), remote_data_dir)?;
268270
Ok(Box::new(benchmark) as _)
269271
}
270272
BenchmarkArg::StatPopGen => {
271-
let scale_factor = opts.get_as::<u64>("scale_factor").unwrap_or(1);
273+
let scale_factor = opts.get_as::<u64>(SCALE_FACTOR_KEY).unwrap_or(1);
272274
let benchmark = StatPopGenBenchmark::new(scale_factor)?;
273275
Ok(Box::new(benchmark) as _)
274276
}
275277
BenchmarkArg::Fineweb => {
276-
let remote_data_dir = opts.get_as::<String>("remote_data_dir");
278+
let remote_data_dir = opts.get_as::<String>(REMOTE_DATA_KEY);
277279
let benchmark = FinewebBenchmark::with_remote_data_dir(remote_data_dir)?;
278280
Ok(Box::new(benchmark) as _)
279281
}
280282
BenchmarkArg::GhArchive => {
281-
let remote_data_dir = opts.get_as::<String>("remote_data_dir");
283+
let remote_data_dir = opts.get_as::<String>(REMOTE_DATA_KEY);
282284
let benchmark = GithubArchiveBenchmark::with_remote_data_dir(remote_data_dir)?;
283285
Ok(Box::new(benchmark) as _)
284286
}
285287
}
286288
}
287289

288290
/// A single key-value option for benchmark configuration.
289-
#[derive(Clone)]
291+
#[derive(Clone, Debug)]
290292
pub struct Opt {
291293
key: String,
292294
value: String,

0 commit comments

Comments
 (0)