Skip to content

Commit feb385d

Browse files
authored
benchmark(bigtable): update sync and async scan benchmarks (#15557)
1 parent 97b036b commit feb385d

File tree

8 files changed

+261
-29
lines changed

8 files changed

+261
-29
lines changed

google/cloud/bigtable/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ set(bigtable_benchmark_programs
8585
endurance_benchmark.cc
8686
mutation_batcher_throughput_benchmark.cc
8787
read_sync_vs_async_benchmark.cc
88+
scan_async_throughput_benchmark.cc
8889
scan_throughput_benchmark.cc)
8990
export_list_to_bazel("bigtable_benchmark_programs.bzl"
9091
"bigtable_benchmark_programs" YEAR "2018")

google/cloud/bigtable/benchmarks/benchmark.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ google::cloud::StatusOr<BenchmarkOptions> ParseArgs(
6262
"--thread-count=1",
6363
"--test-duration=1s",
6464
"--table-size=11000",
65+
"--enable-metrics=true",
6566
},
6667
description);
6768
}
@@ -124,9 +125,11 @@ void Benchmark::DeleteTable() {
124125
}
125126
}
126127

127-
Table Benchmark::MakeTable() const {
128+
Table Benchmark::MakeTable(Options connection_opts) const {
129+
auto connection_options =
130+
google::cloud::internal::MergeOptions(std::move(connection_opts), opts_);
128131
auto table_opts = Options{}.set<AppProfileIdOption>(options_.app_profile_id);
129-
return Table(MakeDataConnection(opts_),
132+
return Table(MakeDataConnection(std::move(connection_options)),
130133
TableResource(options_.project_id, options_.instance_id,
131134
options_.table_id),
132135
std::move(table_opts));

google/cloud/bigtable/benchmarks/benchmark.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class Benchmark {
6363
google::cloud::StatusOr<BenchmarkResult> PopulateTable();
6464

6565
/// Return a `bigtable::Table` configured for this benchmark.
66-
Table MakeTable() const;
66+
Table MakeTable(Options connection_opts = Options{}) const;
6767

6868
/// Create a random key.
6969
std::string MakeRandomKey(google::cloud::internal::DefaultPRNG& gen) const;

google/cloud/bigtable/benchmarks/benchmark_options.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ google::cloud::StatusOr<BenchmarkOptions> ParseBenchmarkOptions(
9595
[&options](std::string const& val) {
9696
options.include_read_rows = ParseBoolean(val).value_or(true);
9797
}},
98-
98+
{"--enable-metrics",
99+
"whether to enable Client Side Metrics for benchmarking",
100+
[&options](std::string const& val) {
101+
options.enable_metrics = ParseBoolean(val).value_or(true);
102+
}},
99103
};
100104

101105
auto usage = BuildUsage(desc, argv[0]);

google/cloud/bigtable/benchmarks/benchmark_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ struct BenchmarkOptions {
4141
int parallel_requests = 10;
4242
bool exit_after_parse = false;
4343
bool include_read_rows = false;
44+
bool enable_metrics = true;
4445
};
4546

4647
google::cloud::StatusOr<BenchmarkOptions> ParseBenchmarkOptions(

google/cloud/bigtable/benchmarks/bigtable_benchmark_programs.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ bigtable_benchmark_programs = [
2121
"endurance_benchmark.cc",
2222
"mutation_batcher_throughput_benchmark.cc",
2323
"read_sync_vs_async_benchmark.cc",
24+
"scan_async_throughput_benchmark.cc",
2425
"scan_throughput_benchmark.cc",
2526
]
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "google/cloud/bigtable/benchmarks/benchmark.h"
16+
#ifdef PROFILE
17+
#include "google/cloud/internal/getenv.h"
18+
#include "gperftools/profiler.h"
19+
#endif
20+
#include <chrono>
21+
#include <future>
22+
#include <iomanip>
23+
#include <iostream>
24+
#include <sstream>
25+
26+
char const kDescription[] =
27+
R"""(Measure the throughput of `Table::AsyncReadRows()`.
28+
29+
This benchmark measures the throughput of `AsyncReadRows()` on a "typical" table
30+
used for serving data. The benchmark:
31+
- Creates a table with 10,000,000 rows, each row with a single column family,
32+
but with 10 columns.
33+
- If there is a collision on the table name the benchmark aborts immediately.
34+
- The benchmark populates the table during an initial phase. The benchmark uses
35+
`BulkApply()` to populate the table, multiple threads to populate in parallel,
36+
and provides an initial split hint when creating the table.
37+
- The benchmark reports the throughput of this bulk upload phase.
38+
39+
After successfully uploading the initial data, the main phase of the benchmark
40+
starts. During this phase the benchmark will:
41+
42+
- Execute the following block with different scan sizes:
43+
- Execute the following loop for S seconds:
44+
- Pick one of the 10,000,000 keys at random, with uniform probability.
45+
- Scan the number rows starting the key selected above.
46+
- Go back and pick a new random key.
47+
48+
The benchmark will report throughput in rows per second for each scans with 100,
49+
1,000, 10,000, 100,000, and 1,000,000 rows.
50+
51+
Using a command-line parameter the benchmark can be configured to create a local
52+
gRPC server that implements the Cloud Bigtable APIs used by the benchmark. If
53+
this parameter is not used, the benchmark uses the default configuration, that
54+
is, a production instance of Cloud Bigtable unless the CLOUD_BIGTABLE_EMULATOR
55+
environment variable is set.
56+
)""";
57+
58+
/// Helper functions and types for the scan_throughput_benchmark.
59+
namespace {
60+
namespace bigtable = ::google::cloud::bigtable;
61+
using bigtable::benchmarks::Benchmark;
62+
using bigtable::benchmarks::BenchmarkResult;
63+
using bigtable::benchmarks::FormatDuration;
64+
using bigtable::benchmarks::kColumnFamily;
65+
66+
constexpr int kScanSizes[] = {100, 1000, 10000, 100000, 1000000};
67+
68+
/// Run an iteration of the test.
69+
BenchmarkResult RunBenchmark(bigtable::benchmarks::Benchmark const& benchmark,
70+
google::cloud::internal::DefaultPRNG& generator,
71+
std::uniform_int_distribution<std::int64_t> prng,
72+
std::int64_t scan_size,
73+
std::chrono::seconds test_duration,
74+
google::cloud::bigtable::Table& table);
75+
} // anonymous namespace
76+
77+
int main(int argc, char* argv[]) {
78+
auto options = bigtable::benchmarks::ParseArgs(argc, argv, kDescription);
79+
if (!options) {
80+
std::cerr << options.status() << "\n";
81+
return -1;
82+
}
83+
if (options->exit_after_parse) return 0;
84+
Benchmark benchmark(*options);
85+
86+
// Create and populate the table for the benchmark.
87+
benchmark.CreateTable();
88+
auto populate_results = benchmark.PopulateTable();
89+
Benchmark::PrintThroughputResult(std::cout, "scant", "Upload",
90+
*populate_results);
91+
92+
// Create the client here so that we don't repeatedly incur connection setup
93+
// costs while running all the scans.
94+
auto table = benchmark.MakeTable(
95+
google::cloud::Options{}.set<bigtable::EnableMetricsOption>(
96+
options->enable_metrics));
97+
98+
auto generator = google::cloud::internal::MakeDefaultPRNG();
99+
100+
#ifdef PROFILE
101+
/*
102+
* Profiling docs: https://gperftools.github.io/gperftools/cpuprofile.html
103+
* Typical execution:
104+
* $ PROFILER_PATH="/tmp/<filename>" bazel run -c opt --copt=-DPROFILE \
105+
* --copt=-g --linkopt='-lprofiler' \
106+
* google/cloud/bigtable/benchmarks:scan_async_throughput_benchmark
107+
*/
108+
auto profile_data_path = google::cloud::internal::GetEnv("PROFILER_PATH");
109+
if (profile_data_path) ProfilerStart(profile_data_path->c_str());
110+
auto profiler_start = std::chrono::steady_clock::now();
111+
#endif // PROFILE
112+
std::map<std::string, BenchmarkResult> results_by_size;
113+
for (auto scan_size : kScanSizes) {
114+
std::uniform_int_distribution<std::int64_t> prng(
115+
0, options->table_size - scan_size - 1);
116+
std::cout << "# Running benchmark [" << scan_size << "] " << std::flush;
117+
auto start = std::chrono::steady_clock::now();
118+
auto combined = RunBenchmark(benchmark, generator, prng, scan_size,
119+
options->test_duration, table);
120+
using std::chrono::duration_cast;
121+
combined.elapsed = duration_cast<std::chrono::milliseconds>(
122+
std::chrono::steady_clock::now() - start);
123+
std::cout << " DONE. Elapsed=" << FormatDuration(combined.elapsed)
124+
<< ", Ops=" << combined.operations.size()
125+
<< ", Rows=" << combined.row_count << "\n";
126+
auto op_name = "AsyncScan(" + std::to_string(scan_size) + ")";
127+
Benchmark::PrintLatencyResult(std::cout, "scant", op_name, combined);
128+
results_by_size[op_name] = std::move(combined);
129+
}
130+
#ifdef PROFILE
131+
auto profiler_stop = std::chrono::steady_clock::now();
132+
if (profile_data_path) {
133+
ProfilerStop();
134+
std::cout << "Steady clock profiling duration="
135+
<< FormatDuration(profiler_stop - profiler_start) << "\n";
136+
}
137+
#endif // PROFILE
138+
139+
std::cout << bigtable::benchmarks::Benchmark::ResultsCsvHeader() << "\n";
140+
benchmark.PrintResultCsv(std::cout, "scant", "BulkApply()", "Latency",
141+
*populate_results);
142+
for (auto& kv : results_by_size) {
143+
benchmark.PrintResultCsv(std::cout, "scant", kv.first, "IterationTime",
144+
kv.second);
145+
}
146+
147+
benchmark.DeleteTable();
148+
return 0;
149+
}
150+
151+
namespace {
152+
153+
BenchmarkResult RunBenchmark(bigtable::benchmarks::Benchmark const& benchmark,
154+
google::cloud::internal::DefaultPRNG& generator,
155+
std::uniform_int_distribution<std::int64_t> prng,
156+
std::int64_t scan_size,
157+
std::chrono::seconds test_duration,
158+
google::cloud::bigtable::Table& table) {
159+
BenchmarkResult result = {};
160+
auto test_start = std::chrono::steady_clock::now();
161+
while (std::chrono::steady_clock::now() < test_start + test_duration) {
162+
auto row_set = bigtable::RowSet{
163+
bigtable::RowRange::StartingAt(benchmark.MakeKey(prng(generator)))};
164+
long count = 0; // NOLINT(google-runtime-int)
165+
std::promise<long> all_done; // NOLINT(google-runtime-int)
166+
// NOLINTNEXTLINE(google-runtime-int)
167+
std::future<long> all_done_future = all_done.get_future();
168+
169+
auto op = [&all_done, &all_done_future, &count, &table, scan_size,
170+
&row_set]() mutable -> google::cloud::Status {
171+
long num_rows = 0; // NOLINT(google-runtime-int)
172+
table.AsyncReadRows(
173+
[&num_rows](auto const&) mutable {
174+
++num_rows;
175+
return google::cloud::make_ready_future(true);
176+
},
177+
[&all_done, &num_rows](auto const&) mutable {
178+
all_done.set_value(num_rows);
179+
},
180+
std::move(row_set), scan_size,
181+
bigtable::Filter::ColumnRangeClosed(kColumnFamily, "field0",
182+
"field9"));
183+
count = all_done_future.get();
184+
return {};
185+
};
186+
result.operations.push_back(Benchmark::TimeOperation(op));
187+
result.row_count += count;
188+
}
189+
return result;
190+
}
191+
192+
} // anonymous namespace

0 commit comments

Comments
 (0)