Skip to content
This repository was archived by the owner on Dec 8, 2021. It is now read-only.

Commit fd06727

Browse files
authored
feat: allow re-using a database across benchmark runs (#1174)
* feat: allow re-using a database across benchmark runs If the user specifies a non-empty `--database <database_name>` on the command line, it will be re-used across runs. The database will be created and populated if it doesn't already exist (i.e. on the first run) but it will not be dropped at the end of the run as it normally would be. This saves a lot of setup time if you're running the benchmark repeatedly. Otherwise, the behavior is unchanged. * address review comments. * the experiments need to run when using an existing database
1 parent fbdab30 commit fd06727

File tree

3 files changed

+106
-44
lines changed

3 files changed

+106
-44
lines changed

google/cloud/spanner/benchmarks/benchmarks_config.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ google::cloud::StatusOr<Config> ParseArgs(std::vector<std::string> args) {
8484
[](Config& c, std::string v) { c.project_id = std::move(v); }},
8585
{"--instance=",
8686
[](Config& c, std::string v) { c.instance_id = std::move(v); }},
87+
{"--database=",
88+
[](Config& c, std::string const& v) { c.database_id = std::move(v); }},
8789
{"--samples=",
8890
[](Config& c, std::string const& v) { c.samples = std::stoi(v); }},
8991
{"--iteration-duration=",

google/cloud/spanner/benchmarks/multiple_rows_cpu_benchmark.cc

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,16 @@ int main(int argc, char* argv[]) {
117117
config.instance_id = *std::move(instance);
118118
}
119119

120-
cs::Database database(
121-
config.project_id, config.instance_id,
122-
google::cloud::spanner_testing::RandomDatabaseName(generator));
123-
config.database_id = database.database_id();
120+
// If the user specified a database name on the command line, re-use it to
121+
// reduce setup time when running the benchmark repeatedly. It's assumed that
122+
// other flags related to database creation have not been changed across runs.
123+
bool user_specified_database = !config.database_id.empty();
124+
if (!user_specified_database) {
125+
config.database_id =
126+
google::cloud::spanner_testing::RandomDatabaseName(generator);
127+
}
128+
cs::Database database(config.project_id, config.instance_id,
129+
config.database_id);
124130

125131
// Once the configuration is fully initialized and the database name set,
126132
// print everything out.
@@ -137,18 +143,27 @@ int main(int argc, char* argv[]) {
137143
}
138144
return statements;
139145
}();
140-
auto created = admin_client.CreateDatabase(database, additional_statements);
146+
auto create_future =
147+
admin_client.CreateDatabase(database, additional_statements);
141148
std::cout << "# Waiting for database creation to complete " << std::flush;
142149
for (;;) {
143-
auto status = created.wait_for(std::chrono::seconds(1));
150+
auto status = create_future.wait_for(std::chrono::seconds(1));
144151
if (status == std::future_status::ready) break;
145152
std::cout << '.' << std::flush;
146153
}
147154
std::cout << " DONE\n";
148-
auto db = created.get();
155+
156+
bool database_created = true;
157+
auto db = create_future.get();
149158
if (!db) {
150-
std::cerr << "Error creating database: " << db.status() << "\n";
151-
return 1;
159+
if (user_specified_database &&
160+
db.status().code() == google::cloud::StatusCode::kAlreadyExists) {
161+
std::cout << "# Re-using existing database\n";
162+
database_created = false;
163+
} else {
164+
std::cerr << "Error creating database: " << db.status() << "\n";
165+
return 1;
166+
}
152167
}
153168

154169
std::cout << "ClientCount,ThreadCount,UsingStub"
@@ -158,21 +173,32 @@ int main(int argc, char* argv[]) {
158173
int exit_status = EXIT_SUCCESS;
159174

160175
auto experiment = e->second(generator);
161-
auto status = experiment->SetUp(config, database);
162-
if (!status.ok()) {
163-
std::cout << "# Skipping experiment, SetUp() failed: " << status << "\n";
164-
exit_status = EXIT_FAILURE;
165-
} else {
166-
status = experiment->Run(config, database);
167-
if (!status.ok()) exit_status = EXIT_FAILURE;
168-
(void)experiment->TearDown(config, database);
176+
Status setup_status;
177+
if (database_created) {
178+
setup_status = experiment->SetUp(config, database);
179+
if (!setup_status.ok()) {
180+
std::cout << "# Skipping experiment, SetUp() failed: " << setup_status
181+
<< "\n";
182+
exit_status = EXIT_FAILURE;
183+
}
184+
}
185+
if (setup_status.ok()) {
186+
auto run_status = experiment->Run(config, database);
187+
if (!run_status.ok()) exit_status = EXIT_FAILURE;
188+
if (database_created) {
189+
(void)experiment->TearDown(config, database);
190+
}
169191
}
170192

171-
auto drop = admin_client.DropDatabase(database);
172-
if (!drop.ok()) {
173-
std::cerr << "# Error dropping database: " << drop << "\n";
193+
if (!user_specified_database) {
194+
auto drop = admin_client.DropDatabase(database);
195+
if (!drop.ok()) {
196+
std::cerr << "# Error dropping database: " << drop << "\n";
197+
}
174198
}
175-
std::cout << "# Experiment finished, database dropped\n";
199+
std::cout << "# Experiment finished, "
200+
<< (user_specified_database ? "user-specified database kept\n"
201+
: "database dropped\n");
176202
return exit_status;
177203
}
178204

@@ -1457,7 +1483,10 @@ class RunAllExperiment : public Experiment {
14571483
: generator_(generator) {}
14581484

14591485
std::string AdditionalDdlStatement() override { return {}; }
1460-
Status SetUp(Config const&, cs::Database const&) override { return {}; }
1486+
Status SetUp(Config const&, cs::Database const&) override {
1487+
setup_called_ = true;
1488+
return {};
1489+
}
14611490
Status TearDown(Config const&, cs::Database const&) override { return {}; }
14621491

14631492
Status Run(Config const& cfg, cs::Database const& database) override {
@@ -1482,11 +1511,14 @@ class RunAllExperiment : public Experiment {
14821511

14831512
std::cout << "# Smoke test for experiment\n";
14841513
std::cout << config << "\n" << std::flush;
1485-
auto status = experiment->SetUp(config, database);
1486-
if (!status.ok()) {
1487-
std::cout << "# ERROR in SetUp: " << status << "\n";
1488-
last_error = status;
1489-
continue;
1514+
if (setup_called_) {
1515+
// Only call SetUp() on each experiment if our own SetUp() was called.
1516+
auto status = experiment->SetUp(config, database);
1517+
if (!status.ok()) {
1518+
std::cout << "# ERROR in SetUp: " << status << "\n";
1519+
last_error = status;
1520+
continue;
1521+
}
14901522
}
14911523
config.use_only_clients = true;
14921524
config.use_only_stubs = false;
@@ -1501,6 +1533,7 @@ class RunAllExperiment : public Experiment {
15011533
}
15021534

15031535
private:
1536+
bool setup_called_ = false;
15041537
std::mutex mu_;
15051538
google::cloud::internal::DefaultPRNG generator_;
15061539
};

google/cloud/spanner/benchmarks/single_row_throughput_benchmark.cc

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,16 @@ int main(int argc, char* argv[]) {
8383
config.instance_id = *std::move(instance);
8484
}
8585

86-
cloud_spanner::Database database(
87-
config.project_id, config.instance_id,
88-
google::cloud::spanner_testing::RandomDatabaseName(generator));
89-
config.database_id = database.database_id();
86+
// If the user specified a database name on the command line, re-use it to
87+
// reduce setup time when running the benchmark repeatedly. It's assumed that
88+
// other flags related to database creation have not been changed across runs.
89+
bool user_specified_database = !config.database_id.empty();
90+
if (!user_specified_database) {
91+
config.database_id =
92+
google::cloud::spanner_testing::RandomDatabaseName(generator);
93+
}
94+
cloud_spanner::Database database(config.project_id, config.instance_id,
95+
config.database_id);
9096

9197
auto available = AvailableExperiments();
9298
auto e = available.find(config.experiment);
@@ -96,22 +102,30 @@ int main(int argc, char* argv[]) {
96102
}
97103

98104
cloud_spanner::DatabaseAdminClient admin_client;
99-
auto created =
105+
auto create_future =
100106
admin_client.CreateDatabase(database, {R"sql(CREATE TABLE KeyValue (
101107
Key INT64 NOT NULL,
102108
Data STRING(1024),
103109
) PRIMARY KEY (Key))sql"});
104110
std::cout << "# Waiting for database creation to complete " << std::flush;
105111
for (;;) {
106-
auto status = created.wait_for(std::chrono::seconds(1));
112+
auto status = create_future.wait_for(std::chrono::seconds(1));
107113
if (status == std::future_status::ready) break;
108114
std::cout << '.' << std::flush;
109115
}
110116
std::cout << " DONE\n";
111-
auto db = created.get();
117+
118+
bool database_created = true;
119+
auto db = create_future.get();
112120
if (!db) {
113-
std::cerr << "Error creating database: " << db.status() << "\n";
114-
return 1;
121+
if (user_specified_database &&
122+
db.status().code() == google::cloud::StatusCode::kAlreadyExists) {
123+
std::cout << "# Re-using existing database\n";
124+
database_created = false;
125+
} else {
126+
std::cerr << "Error creating database: " << db.status() << "\n";
127+
return 1;
128+
}
115129
}
116130

117131
std::cout << "ClientCount,ThreadCount,EventCount,ElapsedTime\n" << std::flush;
@@ -129,14 +143,20 @@ int main(int argc, char* argv[]) {
129143
};
130144

131145
auto experiment = e->second;
132-
experiment->SetUp(config, database);
146+
if (database_created) {
147+
experiment->SetUp(config, database);
148+
}
133149
experiment->Run(config, database, cout_sink);
134150

135-
auto drop = admin_client.DropDatabase(database);
136-
if (!drop.ok()) {
137-
std::cerr << "# Error dropping database: " << drop << "\n";
151+
if (!user_specified_database) {
152+
auto drop = admin_client.DropDatabase(database);
153+
if (!drop.ok()) {
154+
std::cerr << "# Error dropping database: " << drop << "\n";
155+
}
138156
}
139-
std::cout << "# Experiment finished, database dropped\n";
157+
std::cout << "# Experiment finished, "
158+
<< (user_specified_database ? "user-specified database kept\n"
159+
: "database dropped\n");
140160
return 0;
141161
}
142162

@@ -568,7 +588,6 @@ class SelectExperiment : public Experiment {
568588
std::cout << '.' << std::flush;
569589
}
570590
std::cout << " DONE\n";
571-
572591
std::uniform_int_distribution<int> thread_count_gen(config.minimum_threads,
573592
config.maximum_threads);
574593

@@ -654,7 +673,9 @@ class SelectExperiment : public Experiment {
654673

655674
class RunAllExperiment : public Experiment {
656675
public:
657-
void SetUp(Config const&, cloud_spanner::Database const&) override {}
676+
void SetUp(Config const&, cloud_spanner::Database const&) override {
677+
setup_called_ = true;
678+
}
658679

659680
void Run(Config const& cfg, cloud_spanner::Database const& database,
660681
SampleSink const& sink) override {
@@ -668,10 +689,16 @@ class RunAllExperiment : public Experiment {
668689
config.iteration_duration = std::chrono::seconds(1);
669690
std::cout << "# Smoke test for experiment: " << kv.first << "\n";
670691
// TODO(#1119) - tests disabled until we can stay within admin op quota
671-
kv.second->SetUp(config, database);
692+
if (setup_called_) {
693+
// Only call SetUp() on each experiment if our own SetUp() was called.
694+
kv.second->SetUp(config, database);
695+
}
672696
kv.second->Run(config, database, sink);
673697
}
674698
}
699+
700+
private:
701+
bool setup_called_ = false;
675702
};
676703

677704
std::map<std::string, std::shared_ptr<Experiment>> AvailableExperiments() {

0 commit comments

Comments
 (0)