Skip to content

Commit 404d9a8

Browse files
authored
test(storage): more consistent benchmark output and options (#9748)
Now all benchmarks support `--labels`, which makes it easier to concatenate and then analyze the results from multiple tests. The test also capture GCE metadata by default, this is useful for analysis too. I also changed the output from different benchmarks to start with the timestamp of each experiment and then the labels.
1 parent e7ea808 commit 404d9a8

12 files changed

+224
-131
lines changed

google/cloud/storage/benchmarks/aggregate_download_throughput_benchmark.cc

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ gcs::Client MakeClient(AggregateDownloadThroughputOptions const& options) {
131131
return gcs::Client(std::move(opts));
132132
}
133133

134-
std::string CurrentTime() {
135-
return FormatTimestamp(std::chrono::system_clock::now());
136-
};
137-
138134
void PrintResults(AggregateDownloadThroughputOptions const& options,
139135
std::size_t object_count, std::uint64_t dataset_size,
140136
std::vector<TaskResult> const& iteration_results,
@@ -172,7 +168,7 @@ int main(int argc, char* argv[]) {
172168
std::transform(notes.begin(), notes.end(), notes.begin(),
173169
[](char c) { return c == '\n' ? ';' : c; });
174170

175-
std::cout << "# Start time: " << CurrentTime()
171+
std::cout << "# Start time: " << gcs_bm::CurrentTime()
176172
<< "\n# Labels: " << options->labels
177173
<< "\n# Bucket Name: " << options->bucket_name
178174
<< "\n# Object Prefix: " << options->object_prefix
@@ -218,7 +214,7 @@ int main(int argc, char* argv[]) {
218214
// our analysis tools (typically Python pandas, but could be R). Flush the
219215
// header because sometimes we interrupt the benchmark and these tools
220216
// require a header even for empty files.
221-
std::cout << "Iteration,Start,Labels,ObjectCount,DatasetSize,ThreadCount"
217+
std::cout << "Start,Labels,Iteration,ObjectCount,DatasetSize,ThreadCount"
222218
<< ",RepeatsPerIteration,ReadSize,ReadBufferSize,Api"
223219
<< ",ClientPerThread"
224220
<< ",StatusCode,Peer,BytesDownloaded,ElapsedMicroseconds"
@@ -364,8 +360,13 @@ google::cloud::StatusOr<AggregateDownloadThroughputOptions> ParseArgs(
364360
GetEnv("GOOGLE_CLOUD_CPP_AUTO_RUN_EXAMPLES").value_or("") == "yes";
365361
if (auto_run) return SelfTest(argv[0]);
366362

367-
return gcs_bm::ParseAggregateDownloadThroughputOptions({argv, argv + argc},
368-
kDescription);
363+
auto options = gcs_bm::ParseAggregateDownloadThroughputOptions(
364+
{argv, argv + argc}, kDescription);
365+
if (!options) return options;
366+
// We don't want to get the default labels in the unit tests, as they can
367+
// flake.
368+
options->labels = gcs_bm::AddDefaultLabels(std::move(options->labels));
369+
return options;
369370
}
370371

371372
void PrintResults(AggregateDownloadThroughputOptions const& options,
@@ -395,9 +396,9 @@ void PrintResults(AggregateDownloadThroughputOptions const& options,
395396
for (auto const& d : r.details) {
396397
// Join the iteration details with the per-download details. That makes
397398
// it easier to analyze the data in external scripts.
398-
std::cout << ',' << d.iteration //
399-
<< ',' << FormatTimestamp(d.start_time) //
400-
<< labels //
399+
std::cout << FormatTimestamp(d.start_time) //
400+
<< ',' << labels //
401+
<< ',' << d.iteration //
401402
<< ',' << object_count //
402403
<< ',' << dataset_size //
403404
<< ',' << options.thread_count //
@@ -420,7 +421,8 @@ void PrintResults(AggregateDownloadThroughputOptions const& options,
420421
// the operator of these benchmarks (coryan@) is an impatient person.
421422
auto const bandwidth =
422423
FormatBandwidthGbPerSecond(downloaded_bytes, usage.elapsed_time);
423-
std::cout << "# " << CurrentTime() << " downloaded=" << downloaded_bytes
424+
std::cout << "# " << gcs_bm::CurrentTime()
425+
<< " downloaded=" << downloaded_bytes
424426
<< " cpu_time=" << absl::FromChrono(usage.cpu_time)
425427
<< " elapsed_time=" << absl::FromChrono(usage.elapsed_time)
426428
<< " Gbit/s=" << bandwidth << std::endl;

google/cloud/storage/benchmarks/aggregate_upload_throughput_benchmark.cc

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,7 @@ int main(int argc, char* argv[]) {
142142
std::transform(notes.begin(), notes.end(), notes.begin(),
143143
[](char c) { return c == '\n' ? ';' : c; });
144144

145-
auto current_time = [] {
146-
return FormatTimestamp(std::chrono::system_clock::now());
147-
};
148-
149-
std::cout << "# Start time: " << current_time()
145+
std::cout << "# Start time: " << gcs_bm::CurrentTime()
150146
<< "\n# Labels: " << options->labels
151147
<< "\n# Bucket Name: " << options->bucket_name
152148
<< "\n# Object Prefix: " << options->object_prefix
@@ -210,7 +206,7 @@ int main(int argc, char* argv[]) {
210206
// header because sometimes we interrupt the benchmark and these tools
211207
// require a header even for empty files.
212208
std::cout
213-
<< "Iteration,Start,Labels,ObjectCount,ResumableUploadChunkSize"
209+
<< "Start,Labels,Iteration,ObjectCount,ResumableUploadChunkSize"
214210
<< ",ThreadCount,Api,ClientPerThread"
215211
<< ",BucketName,ObjectName,UploadId,Peer,StatusCode"
216212
<< ",BytesUploaded,ElapsedMicroseconds"
@@ -250,9 +246,9 @@ int main(int argc, char* argv[]) {
250246
for (auto const& d : r.details) {
251247
// Join the iteration details with the per-upload details. That makes
252248
// it easier to analyze the data in external scripts.
253-
std::cout << d.iteration //
254-
<< ',' << FormatTimestamp(d.start_time) //
249+
std::cout << FormatTimestamp(d.start_time) //
255250
<< ',' << labels //
251+
<< ',' << d.iteration //
256252
<< ',' << options->object_count //
257253
<< ',' << options->resumable_upload_chunk_size //
258254
<< ',' << options->thread_count //
@@ -277,7 +273,7 @@ int main(int argc, char* argv[]) {
277273
// the operator of these benchmarks (coryan@) is an impatient person.
278274
auto const bandwidth =
279275
FormatBandwidthGbPerSecond(uploaded_bytes, usage.elapsed_time);
280-
std::cout << "# " << current_time() << " uploaded=" << uploaded_bytes
276+
std::cout << "# " << gcs_bm::CurrentTime() << " uploaded=" << uploaded_bytes
281277
<< " cpu_time=" << absl::FromChrono(usage.cpu_time)
282278
<< " elapsed_time=" << absl::FromChrono(usage.elapsed_time)
283279
<< " Gbit/s=" << bandwidth << std::endl;
@@ -410,8 +406,13 @@ google::cloud::StatusOr<AggregateUploadThroughputOptions> ParseArgs(
410406
GetEnv("GOOGLE_CLOUD_CPP_AUTO_RUN_EXAMPLES").value_or("") == "yes";
411407
if (auto_run) return SelfTest(argv[0]);
412408

413-
return gcs_bm::ParseAggregateUploadThroughputOptions({argv, argv + argc},
414-
kDescription);
409+
auto options = gcs_bm::ParseAggregateUploadThroughputOptions(
410+
{argv, argv + argc}, kDescription);
411+
if (!options) return options;
412+
// We don't want to get the default labels in the unit tests, as they can
413+
// flake.
414+
options->labels = gcs_bm::AddDefaultLabels(std::move(options->labels));
415+
return options;
415416
}
416417

417418
} // namespace

google/cloud/storage/benchmarks/benchmark_utils.cc

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@
1616
#include "google/cloud/storage/benchmarks/bounded_queue.h"
1717
#include "google/cloud/storage/options.h"
1818
#include "google/cloud/grpc_options.h"
19+
#include "google/cloud/internal/absl_str_cat_quiet.h"
20+
#include "google/cloud/internal/absl_str_join_quiet.h"
21+
#include "google/cloud/internal/compute_engine_util.h"
22+
#include "google/cloud/internal/curl_options.h"
23+
#include "google/cloud/internal/rest_client.h"
1924
#include "google/cloud/internal/throw_delegate.h"
2025
#include "google/cloud/options.h"
26+
#include "absl/strings/str_split.h"
27+
#include "absl/strings/strip.h"
2128
#include "absl/time/time.h"
2229
#include <future>
2330
#include <sstream>
24-
#include <stdexcept>
2531

2632
namespace google {
2733
namespace cloud {
@@ -211,13 +217,91 @@ void PrintOptions(std::ostream& os, std::string const& prefix,
211217
.has<google::cloud::storage::internal::TargetApiVersionOption>();
212218
}
213219
}
220+
214221
// Format a timestamp
215222
std::string FormatTimestamp(std::chrono::system_clock::time_point tp) {
216223
auto constexpr kFormat = "%E4Y-%m-%dT%H:%M:%E*SZ";
217224
auto const t = absl::FromChrono(tp);
218225
return absl::FormatTime(kFormat, t, absl::UTCTimeZone());
219226
}
220227

228+
absl::optional<std::string> GetLabel(std::vector<std::string> const& labels,
229+
std::string const& prefix) {
230+
for (auto const& label : labels) {
231+
if (absl::StartsWith(label, prefix)) {
232+
return std::string{absl::StripPrefix(label, prefix)};
233+
}
234+
}
235+
return absl::nullopt;
236+
}
237+
238+
absl::optional<std::string> GetLabel(std::string const& labels,
239+
std::string const& prefix) {
240+
return GetLabel(absl::StrSplit(labels, ','), prefix);
241+
}
242+
243+
absl::optional<std::string> Zone(std::string const& labels) {
244+
return GetLabel(labels, "zone:");
245+
}
246+
247+
absl::optional<std::string> Job(std::string const& labels) {
248+
return GetLabel(labels, "job:");
249+
}
250+
251+
absl::optional<std::string> Task(std::string const& labels) {
252+
return GetLabel(labels, "task:");
253+
}
254+
255+
using ::google::cloud::rest_internal::ReadAll;
256+
using ::google::cloud::rest_internal::RestClient;
257+
using ::google::cloud::rest_internal::RestRequest;
258+
259+
absl::optional<std::string> GetMetadata(RestClient& metadata_server,
260+
std::string const& path) {
261+
RestRequest request(path);
262+
request.AddHeader("Metadata-Flavor", "Google");
263+
auto response_status = metadata_server.Get(request);
264+
if (!response_status) return absl::nullopt;
265+
auto response = *std::move(response_status);
266+
auto const status_code = response->StatusCode();
267+
auto contents = ReadAll(std::move(*response).ExtractPayload());
268+
if (status_code != 200) return absl::nullopt;
269+
if (!contents) return absl::nullopt;
270+
// A lot of metadata attributes have the full resource name (e.e.,
271+
// projects/.../zones/..), we just want the last portion.
272+
std::vector<absl::string_view> split = absl::StrSplit(*contents, '/');
273+
return std::string{split.back()};
274+
}
275+
276+
std::string AddDefaultLabels(std::string const& labels) {
277+
using google::cloud::rest_internal::ConnectionPoolSizeOption;
278+
auto metadata_server = google::cloud::rest_internal::MakePooledRestClient(
279+
absl::StrCat("http", "://",
280+
google::cloud::internal::GceMetadataHostname()),
281+
google::cloud::Options{}.set<ConnectionPoolSizeOption>(4));
282+
struct {
283+
std::string prefix;
284+
std::string path;
285+
} defaults[] = {
286+
{"zone:", "computeMetadata/v1/instance/zone"},
287+
{"machine-type:", "computeMetadata/v1/instance/machine-type"},
288+
{"instance-name:", "computeMetadata/v1/instance/name"},
289+
{"instance-id:", "computeMetadata/v1/instance/id"},
290+
{"project-id:", "computeMetadata/v1/project/project-id"},
291+
{"project-number:", "computeMetadata/v1/project/numeric-project-id"},
292+
};
293+
std::vector<std::string> components =
294+
absl::StrSplit(labels, ',', absl::SkipWhitespace());
295+
for (auto const& d : defaults) {
296+
if (!GetLabel(components, d.prefix).has_value()) {
297+
auto contents = GetMetadata(*metadata_server, d.path);
298+
if (!contents.has_value()) continue;
299+
components.push_back(d.prefix + *contents);
300+
}
301+
}
302+
return absl::StrJoin(components, ",");
303+
}
304+
221305
} // namespace storage_benchmarks
222306
} // namespace cloud
223307
} // namespace google

google/cloud/storage/benchmarks/benchmark_utils.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,18 @@ void PrintOptions(std::ostream& os, std::string const& prefix,
129129
// Format a timestamp
130130
std::string FormatTimestamp(std::chrono::system_clock::time_point tp);
131131

132+
// The current time, formatted
133+
inline std::string CurrentTime() {
134+
return FormatTimestamp(std::chrono::system_clock::now());
135+
}
136+
137+
absl::optional<std::string> GetLabel(std::string const& labels,
138+
std::string const& prefix);
139+
absl::optional<std::string> Zone(std::string const& labels);
140+
absl::optional<std::string> Job(std::string const& labels);
141+
absl::optional<std::string> Task(std::string const& labels);
142+
std::string AddDefaultLabels(std::string const& labels);
143+
132144
} // namespace storage_benchmarks
133145
} // namespace cloud
134146
} // namespace google

google/cloud/storage/benchmarks/storage_throughput_vs_cpu_benchmark.cc

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ A helper script in this directory can generate pretty graphs from the output of
105105
this program.
106106
)""";
107107

108-
using ResultHandler = std::function<void(gcs_bm::ThroughputResult)>;
108+
using ResultHandler =
109+
std::function<void(ThroughputOptions const&, gcs_bm::ThroughputResult)>;
109110

110111
gcs_bm::ClientProvider MakeProvider(ThroughputOptions const& options);
111112

@@ -166,10 +167,11 @@ int main(int argc, char* argv[]) {
166167
<< "]\n# " << name << " Quantum: " << quantum;
167168
};
168169

169-
std::cout << "# Running test on bucket: " << bucket_name << "\n# Start time: "
170-
<< google::cloud::internal::FormatRfc3339(
171-
std::chrono::system_clock::now())
172-
<< "\n# Region: " << options->region << "\n# Duration: "
170+
std::cout << "# Start time: " << gcs_bm::CurrentTime() //
171+
<< "\n# Labels: " << options->labels //
172+
<< "\n# Running test on bucket: " << bucket_name //
173+
<< "\n# Region: " << options->region //
174+
<< "\n# Duration: "
173175
<< absl::FormatDuration(absl::FromChrono(options->duration))
174176
<< "\n# Thread Count: " << options->thread_count
175177
<< "\n# Client Per Thread: " << options->client_per_thread;
@@ -228,9 +230,10 @@ int main(int argc, char* argv[]) {
228230

229231
// Serialize output to `std::cout`.
230232
std::mutex mu;
231-
auto handler = [&mu](gcs_bm::ThroughputResult const& result) {
233+
auto handler = [&mu](ThroughputOptions const& options,
234+
gcs_bm::ThroughputResult const& result) {
232235
std::lock_guard<std::mutex> lk(mu);
233-
gcs_bm::PrintAsCsv(std::cout, result);
236+
gcs_bm::PrintAsCsv(std::cout, options, result);
234237
if (!result.status.ok()) {
235238
google::cloud::LogSink::Instance().Flush();
236239
}
@@ -403,16 +406,16 @@ void RunThread(ThroughputOptions const& options, std::string const& bucket_name,
403406
gcs_bm::kOpWrite, object_size, write_buffer_size, enable_crc,
404407
enable_md5, /*read_range=*/absl::nullopt});
405408
auto status = upload_result.status;
406-
handler(std::move(upload_result));
409+
handler(options, std::move(upload_result));
407410

408411
if (!status.ok()) continue;
409412

410413
auto& downloader = downloaders[downloader_generator(generator)];
411414
for (auto op : {gcs_bm::kOpRead0, gcs_bm::kOpRead1, gcs_bm::kOpRead2}) {
412-
handler(downloader->Run(
413-
bucket_name, object_name,
414-
gcs_bm::ThroughputExperimentConfig{op, object_size, read_buffer_size,
415-
enable_crc, enable_md5, range}));
415+
handler(options, downloader->Run(bucket_name, object_name,
416+
gcs_bm::ThroughputExperimentConfig{
417+
op, object_size, read_buffer_size,
418+
enable_crc, enable_md5, range}));
416419
}
417420
auto client = provider(ExperimentTransport::kJson);
418421
(void)client.DeleteObject(bucket_name, object_name);
@@ -468,7 +471,13 @@ google::cloud::StatusOr<ThroughputOptions> ParseArgs(int argc, char* argv[]) {
468471
.value_or("") == "yes";
469472
if (auto_run) return SelfTest(argv[0]);
470473

471-
return gcs_bm::ParseThroughputOptions({argv, argv + argc}, kDescription);
474+
auto options =
475+
gcs_bm::ParseThroughputOptions({argv, argv + argc}, kDescription);
476+
if (!options) return options;
477+
// We don't want to get the default labels in the unit tests, as they can
478+
// flake.
479+
options->labels = gcs_bm::AddDefaultLabels(std::move(options->labels));
480+
return options;
472481
}
473482

474483
} // namespace

0 commit comments

Comments
 (0)