@@ -50,9 +50,9 @@ Result<std::shared_ptr<Buffer>> WriteStringColParquetBuffer(int64_t nrows) {
5050static void ParquetScanToTableCastStrings (benchmark::State& state) {
5151 // GH-43660: Scan parquet data including a String column using a dataset object with
5252 // LargeString in schema.
53- int64_t nrows = 100'000 ;
54- int64_t batch_size = 100 ;
55- bool use_threads = false ;
53+ size_t num_batches = state. range ( 0 ) ;
54+ size_t batch_size = state. range ( 1 ) ;
55+ size_t nrows = num_batches * batch_size ;
5656 auto format = std::make_shared<ParquetFileFormat>();
5757
5858 // Create a buffer with a single String column and wrap with FileFragment
@@ -71,7 +71,6 @@ static void ParquetScanToTableCastStrings(benchmark::State& state) {
7171
7272 ASSERT_OK_AND_ASSIGN (auto builder, dataset->NewScan ());
7373 ASSERT_OK (builder->BatchSize (batch_size));
74- ASSERT_OK (builder->UseThreads (use_threads));
7574 ASSERT_OK_AND_ASSIGN (auto scanner, builder->Finish ());
7675
7776 for (auto _ : state) {
@@ -82,7 +81,16 @@ static void ParquetScanToTableCastStrings(benchmark::State& state) {
8281 state.SetItemsProcessed (state.iterations () * nrows);
8382}
8483
85- BENCHMARK (ParquetScanToTableCastStrings);
84+ static void ParquetScanBenchmark_Customize (benchmark::internal::Benchmark* b) {
85+ for (const int32_t num_batches : {1000 }) {
86+ for (const int batch_size : {10 , 100 , 1000 }) {
87+ b->Args ({num_batches, batch_size});
88+ }
89+ }
90+ b->ArgNames ({" num_batches" , " batch_size" });
91+ }
92+
93+ BENCHMARK (ParquetScanToTableCastStrings)->Apply (ParquetScanBenchmark_Customize);
8694
8795} // namespace dataset
8896} // namespace arrow
0 commit comments