Skip to content

Commit 9c80447

Browse files
try different batch sizes
1 parent 4939f5b commit 9c80447

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

cpp/src/arrow/dataset/parquet_scan_benchmark.cc

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ Result<std::shared_ptr<Buffer>> WriteStringColParquetBuffer(int64_t nrows) {
5050
static void ParquetScanToTableCastStrings(benchmark::State& state) {
5151
// GH-43660: Scan parquet data including a String column using a dataset object with
5252
// LargeString in schema.
53-
int64_t nrows = 100'000;
54-
int64_t batch_size = 100;
55-
bool use_threads = false;
53+
size_t num_batches = state.range(0);
54+
size_t batch_size = state.range(1);
55+
size_t nrows = num_batches * batch_size;
5656
auto format = std::make_shared<ParquetFileFormat>();
5757

5858
// Create a buffer with a single String column and wrap with FileFragment
@@ -71,7 +71,6 @@ static void ParquetScanToTableCastStrings(benchmark::State& state) {
7171

7272
ASSERT_OK_AND_ASSIGN(auto builder, dataset->NewScan());
7373
ASSERT_OK(builder->BatchSize(batch_size));
74-
ASSERT_OK(builder->UseThreads(use_threads));
7574
ASSERT_OK_AND_ASSIGN(auto scanner, builder->Finish());
7675

7776
for (auto _ : state) {
@@ -82,7 +81,16 @@ static void ParquetScanToTableCastStrings(benchmark::State& state) {
8281
state.SetItemsProcessed(state.iterations() * nrows);
8382
}
8483

85-
BENCHMARK(ParquetScanToTableCastStrings);
84+
static void ParquetScanBenchmark_Customize(benchmark::internal::Benchmark* b) {
85+
for (const int32_t num_batches : {1000}) {
86+
for (const int batch_size : {10, 100, 1000}) {
87+
b->Args({num_batches, batch_size});
88+
}
89+
}
90+
b->ArgNames({"num_batches", "batch_size"});
91+
}
92+
93+
BENCHMARK(ParquetScanToTableCastStrings)->Apply(ParquetScanBenchmark_Customize);
8694

8795
} // namespace dataset
8896
} // namespace arrow

0 commit comments

Comments
 (0)