Skip to content

Commit 24aed40

Browse files
authored
impl(bigtable): introduce first response latency metric (#15348)
* impl(bigtable): introduce first response latency metrics
1 parent ac99ef0 commit 24aed40

File tree

8 files changed

+271
-7
lines changed

8 files changed

+271
-7
lines changed

google/cloud/bigtable/benchmarks/benchmark_options.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ google::cloud::StatusOr<BenchmarkOptions> ParseBenchmarkOptions(
9191
[&options](std::string const& val) {
9292
options.use_embedded_server = ParseBoolean(val).value_or(true);
9393
}},
94+
{"--include-read-rows", "whether to execute ReadRows for benchmarking",
95+
[&options](std::string const& val) {
96+
options.include_read_rows = ParseBoolean(val).value_or(true);
97+
}},
98+
9499
};
95100

96101
auto usage = BuildUsage(desc, argv[0]);

google/cloud/bigtable/benchmarks/benchmark_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ struct BenchmarkOptions {
4040
bool use_embedded_server = false;
4141
int parallel_requests = 10;
4242
bool exit_after_parse = false;
43+
bool include_read_rows = false;
4344
};
4445

4546
google::cloud::StatusOr<BenchmarkOptions> ParseBenchmarkOptions(

google/cloud/bigtable/benchmarks/benchmark_options_test.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ TEST(BenchmarkOptions, Basic) {
3434
auto options = ParseBenchmarkOptions(
3535
{"self-test", "--project-id=test-project", "--instance-id=test-instance",
3636
"--app-profile-id=test-app-profile-id", "--table-size=10000",
37-
"--test-duration=300s", "--use-embedded-server=true"},
37+
"--test-duration=300s", "--use-embedded-server=true",
38+
"--include-read-rows=true"},
3839
"");
3940
ASSERT_STATUS_OK(options);
4041
EXPECT_FALSE(options->exit_after_parse);
@@ -44,6 +45,7 @@ TEST(BenchmarkOptions, Basic) {
4445
EXPECT_EQ(10000, options->table_size);
4546
EXPECT_EQ(300, options->test_duration.count());
4647
EXPECT_EQ(true, options->use_embedded_server);
48+
EXPECT_EQ(true, options->include_read_rows);
4749
}
4850

4951
TEST(BenchmarkOptions, Defaults) {

google/cloud/bigtable/benchmarks/endurance_benchmark.cc

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ using bigtable::benchmarks::ParseArgs;
6363
/// Run an iteration of the test, returns the number of operations.
6464
google::cloud::StatusOr<long> RunBenchmark( // NOLINT(google-runtime-int)
6565
bigtable::benchmarks::Benchmark const& benchmark,
66-
std::chrono::seconds test_duration);
66+
std::chrono::seconds test_duration, bool include_read_rows);
6767

6868
} // anonymous namespace
6969

@@ -91,7 +91,8 @@ int main(int argc, char* argv[]) {
9191
launch_policy = std::launch::deferred;
9292
}
9393
tasks.emplace_back(std::async(launch_policy, RunBenchmark,
94-
std::ref(benchmark), options->test_duration));
94+
std::ref(benchmark), options->test_duration,
95+
options->include_read_rows));
9596
}
9697

9798
// Wait for the threads and combine all the results.
@@ -147,9 +148,32 @@ OperationResult RunOneReadRow(bigtable::Table& table,
147148
return Benchmark::TimeOperation(std::move(op));
148149
}
149150

151+
OperationResult RunOneReadRows(
152+
bigtable::Table& table, Benchmark const& benchmark,
153+
google::cloud::internal::DefaultPRNG& generator) {
154+
constexpr int kReadRowsCount = 100;
155+
auto row_set = bigtable::RowSet();
156+
157+
for (int i = 0; i != kReadRowsCount; ++i) {
158+
row_set.Append(benchmark.MakeRandomKey(generator));
159+
}
160+
161+
auto op = [&table, &row_set]() -> google::cloud::Status {
162+
auto reader = table.ReadRows(
163+
std::move(row_set),
164+
bigtable::Filter::ColumnRangeClosed(kColumnFamily, "field0", "field9"));
165+
for (auto const& row : reader) {
166+
if (!row) return std::move(row).status();
167+
}
168+
return google::cloud::Status{};
169+
};
170+
171+
return Benchmark::TimeOperation(std::move(op));
172+
}
173+
150174
google::cloud::StatusOr<long> RunBenchmark( // NOLINT(google-runtime-int)
151175
bigtable::benchmarks::Benchmark const& benchmark,
152-
std::chrono::seconds test_duration) {
176+
std::chrono::seconds test_duration, bool include_read_rows) {
153177
BenchmarkResult partial = {};
154178

155179
auto table = benchmark.MakeTable();
@@ -166,12 +190,18 @@ google::cloud::StatusOr<long> RunBenchmark( // NOLINT(google-runtime-int)
166190
}
167191
partial.operations.emplace_back(op_result);
168192
++partial.row_count;
169-
op_result = RunOneReadRow(table, benchmark, generator);
193+
194+
if (include_read_rows) {
195+
op_result = RunOneReadRows(table, benchmark, generator);
196+
} else {
197+
op_result = RunOneReadRow(table, benchmark, generator);
198+
}
170199
if (!op_result.status.ok()) {
171200
return op_result.status;
172201
}
173202
partial.operations.emplace_back(op_result);
174203
++partial.row_count;
204+
175205
op_result = RunOneApply(table, benchmark, generator);
176206
if (!op_result.status.ok()) {
177207
return op_result.status;

google/cloud/bigtable/internal/metrics.cc

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,60 @@ std::unique_ptr<Metric> RetryCount::clone(ResourceLabels resource_labels,
213213
return m;
214214
}
215215

216+
FirstResponseLatency::FirstResponseLatency(
217+
std::string const& instrumentation_scope,
218+
opentelemetry::nostd::shared_ptr<
219+
opentelemetry::metrics::MeterProvider> const& provider)
220+
: first_response_latencies_(
221+
provider
222+
->GetMeter(instrumentation_scope,
223+
kMeterInstrumentationScopeVersion)
224+
->CreateDoubleHistogram("first_response_latencies")) {}
225+
226+
void FirstResponseLatency::PreCall(opentelemetry::context::Context const&,
227+
PreCallParams const& p) {
228+
if (p.first_attempt) {
229+
operation_start_ = p.attempt_start;
230+
}
231+
}
232+
233+
void FirstResponseLatency::PostCall(opentelemetry::context::Context const&,
234+
grpc::ClientContext const& client_context,
235+
PostCallParams const&) {
236+
auto response_params = GetResponseParamsFromTrailingMetadata(client_context);
237+
if (response_params) {
238+
resource_labels_.cluster = response_params->cluster_id();
239+
resource_labels_.zone = response_params->zone_id();
240+
}
241+
}
242+
243+
void FirstResponseLatency::ElementDelivery(
244+
opentelemetry::context::Context const&, ElementDeliveryParams const& p) {
245+
if (p.first_response) {
246+
first_response_latency_ = std::chrono::duration_cast<LatencyDuration>(
247+
p.element_delivery - operation_start_);
248+
}
249+
}
250+
251+
void FirstResponseLatency::OnDone(
252+
opentelemetry::context::Context const& context, OnDoneParams const& p) {
253+
if (first_response_latency_) {
254+
data_labels_.status = StatusCodeToString(p.operation_status.code());
255+
auto m = IntoLabelMap(resource_labels_, data_labels_,
256+
std::set<std::string>{"streaming"});
257+
first_response_latencies_->Record(first_response_latency_->count(),
258+
std::move(m), context);
259+
}
260+
}
261+
262+
std::unique_ptr<Metric> FirstResponseLatency::clone(
263+
ResourceLabels resource_labels, DataLabels data_labels) const {
264+
auto m = std::make_unique<FirstResponseLatency>(*this);
265+
m->resource_labels_ = std::move(resource_labels);
266+
m->data_labels_ = std::move(data_labels);
267+
return m;
268+
}
269+
216270
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
217271
} // namespace bigtable_internal
218272
} // namespace cloud

google/cloud/bigtable/internal/metrics.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,34 @@ class RetryCount : public Metric {
172172
retry_count_;
173173
};
174174

175+
class FirstResponseLatency : public Metric {
176+
public:
177+
FirstResponseLatency(
178+
std::string const& instrumentation_scope,
179+
opentelemetry::nostd::shared_ptr<
180+
opentelemetry::metrics::MeterProvider> const& provider);
181+
void PreCall(opentelemetry::context::Context const&,
182+
PreCallParams const& p) override;
183+
void PostCall(opentelemetry::context::Context const& context,
184+
grpc::ClientContext const& client_context,
185+
PostCallParams const& p) override;
186+
void ElementDelivery(opentelemetry::context::Context const&,
187+
ElementDeliveryParams const&) override;
188+
void OnDone(opentelemetry::context::Context const& context,
189+
OnDoneParams const& p) override;
190+
191+
std::unique_ptr<Metric> clone(ResourceLabels resource_labels,
192+
DataLabels data_labels) const override;
193+
194+
private:
195+
ResourceLabels resource_labels_;
196+
DataLabels data_labels_;
197+
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::Histogram<double>>
198+
first_response_latencies_;
199+
OperationContext::Clock::time_point operation_start_;
200+
absl::optional<LatencyDuration> first_response_latency_;
201+
};
202+
175203
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
176204
} // namespace bigtable_internal
177205
} // namespace cloud

google/cloud/bigtable/internal/metrics_test.cc

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,149 @@ TEST(RetryCountTest, ThreeAttempts) {
785785
clone->OnDone(otel_context, {clock->Now(), Status{StatusCode::kOk, "ok"}});
786786
}
787787

788+
TEST(FirstResponseLatency, Success) {
789+
auto mock_histogram = std::make_unique<MockHistogram<double>>();
790+
EXPECT_CALL(
791+
*mock_histogram,
792+
Record(A<double>(), A<opentelemetry::common::KeyValueIterable const&>(),
793+
A<opentelemetry::context::Context const&>()))
794+
.WillOnce([](double value,
795+
opentelemetry::common::KeyValueIterable const& attributes,
796+
opentelemetry::context::Context const&) {
797+
EXPECT_THAT(value, Eq(2.0));
798+
EXPECT_THAT(
799+
MakeAttributesMap(attributes),
800+
UnorderedElementsAre(
801+
Pair("project_id", "my-project-id"),
802+
Pair("instance", "my-instance"), Pair("cluster", "my-cluster"),
803+
Pair("table", "my-table"), Pair("zone", "my-zone"),
804+
Pair("method", "my-method"), Pair("status", "OK"),
805+
Pair("client_name", "my-client-name"),
806+
Pair("client_uid", "my-client-uid"),
807+
Pair("app_profile", "my-app-profile")));
808+
});
809+
810+
opentelemetry::nostd::shared_ptr<MockMeter> mock_meter =
811+
std::make_shared<MockMeter>();
812+
EXPECT_CALL(*mock_meter, CreateDoubleHistogram)
813+
.WillOnce([mock = std::move(mock_histogram)](
814+
opentelemetry::nostd::string_view name,
815+
opentelemetry::nostd::string_view,
816+
opentelemetry::nostd::string_view) mutable {
817+
EXPECT_THAT(name, Eq("first_response_latencies"));
818+
return std::move(mock);
819+
});
820+
821+
opentelemetry::nostd::shared_ptr<MockMeterProvider> mock_provider =
822+
std::make_shared<MockMeterProvider>();
823+
EXPECT_CALL(*mock_provider, GetMeter)
824+
#if OPENTELEMETRY_ABI_VERSION_NO >= 2
825+
.WillOnce([&](opentelemetry::nostd::string_view scope,
826+
opentelemetry::nostd::string_view scope_version,
827+
opentelemetry::nostd::string_view,
828+
opentelemetry::common::KeyValueIterable const*) mutable {
829+
#else
830+
.WillOnce([&](opentelemetry::nostd::string_view scope,
831+
opentelemetry::nostd::string_view scope_version,
832+
opentelemetry::nostd::string_view) mutable {
833+
#endif
834+
EXPECT_THAT(scope, Eq("my-instrument-scope"));
835+
EXPECT_THAT(scope_version, Eq("v1"));
836+
return mock_meter;
837+
});
838+
839+
FirstResponseLatency first_response_latency("my-instrument-scope",
840+
mock_provider);
841+
ResourceLabels resource_labels{"my-project-id", "my-instance", "my-table", "",
842+
""};
843+
DataLabels data_labels{"my-method", "my-streaming", "my-client-name",
844+
"my-client-uid", "my-app-profile", ""};
845+
auto clone = first_response_latency.clone(resource_labels, data_labels);
846+
847+
grpc::ClientContext client_context;
848+
SetClusterZone(client_context, "my-cluster", "my-zone");
849+
850+
auto otel_context = opentelemetry::context::RuntimeContext::GetCurrent();
851+
auto clock = std::make_shared<FakeSteadyClock>();
852+
853+
// Verify that only the first response is recorded.
854+
clock->SetTime(std::chrono::steady_clock::now());
855+
clone->PreCall(otel_context, {clock->Now(), true});
856+
clock->AdvanceTime(std::chrono::milliseconds(2));
857+
clone->ElementDelivery(otel_context, {clock->Now(), true});
858+
clock->AdvanceTime(std::chrono::milliseconds(5));
859+
clone->ElementDelivery(otel_context, {clock->Now(), false});
860+
clock->AdvanceTime(std::chrono::milliseconds(5));
861+
clone->PostCall(otel_context, client_context,
862+
{clock->Now(), Status{StatusCode::kOk, "ok"}});
863+
clock->AdvanceTime(std::chrono::milliseconds(5));
864+
clone->OnDone(otel_context, {clock->Now(), Status{StatusCode::kOk, "ok"}});
865+
}
866+
867+
TEST(FirstResponseLatency, NoDataReceived) {
868+
auto mock_histogram = std::make_unique<MockHistogram<double>>();
869+
EXPECT_CALL(
870+
*mock_histogram,
871+
Record(A<double>(), A<opentelemetry::common::KeyValueIterable const&>(),
872+
A<opentelemetry::context::Context const&>()))
873+
.Times(0);
874+
875+
opentelemetry::nostd::shared_ptr<MockMeter> mock_meter =
876+
std::make_shared<MockMeter>();
877+
EXPECT_CALL(*mock_meter, CreateDoubleHistogram)
878+
.WillOnce([mock = std::move(mock_histogram)](
879+
opentelemetry::nostd::string_view name,
880+
opentelemetry::nostd::string_view,
881+
opentelemetry::nostd::string_view) mutable {
882+
EXPECT_THAT(name, Eq("first_response_latencies"));
883+
return std::move(mock);
884+
});
885+
886+
opentelemetry::nostd::shared_ptr<MockMeterProvider> mock_provider =
887+
std::make_shared<MockMeterProvider>();
888+
EXPECT_CALL(*mock_provider, GetMeter)
889+
#if OPENTELEMETRY_ABI_VERSION_NO >= 2
890+
.WillOnce([&](opentelemetry::nostd::string_view scope,
891+
opentelemetry::nostd::string_view scope_version,
892+
opentelemetry::nostd::string_view,
893+
opentelemetry::common::KeyValueIterable const*) mutable {
894+
#else
895+
.WillOnce([&](opentelemetry::nostd::string_view scope,
896+
opentelemetry::nostd::string_view scope_version,
897+
opentelemetry::nostd::string_view) mutable {
898+
#endif
899+
EXPECT_THAT(scope, Eq("my-instrument-scope"));
900+
EXPECT_THAT(scope_version, Eq("v1"));
901+
return mock_meter;
902+
});
903+
904+
FirstResponseLatency first_response_latency("my-instrument-scope",
905+
mock_provider);
906+
ResourceLabels resource_labels{"my-project-id", "my-instance", "my-table", "",
907+
""};
908+
DataLabels data_labels{"my-method", "my-streaming", "my-client-name",
909+
"my-client-uid", "my-app-profile", ""};
910+
auto clone = first_response_latency.clone(resource_labels, data_labels);
911+
912+
grpc::ClientContext client_context;
913+
SetClusterZone(client_context, "my-cluster", "my-zone");
914+
915+
auto otel_context = opentelemetry::context::RuntimeContext::GetCurrent();
916+
auto clock = std::make_shared<FakeSteadyClock>();
917+
918+
// ElementDelivery is not called.
919+
clock->SetTime(std::chrono::steady_clock::now());
920+
clone->PreCall(otel_context, {clock->Now(), true});
921+
clock->AdvanceTime(std::chrono::milliseconds(5));
922+
clone->PostCall(
923+
otel_context, client_context,
924+
{clock->Now(), Status{StatusCode::kDeadlineExceeded, "timeout"}});
925+
clock->AdvanceTime(std::chrono::milliseconds(5));
926+
clone->OnDone(
927+
otel_context,
928+
{clock->Now(), Status{StatusCode::kDeadlineExceeded, "timeout"}});
929+
}
930+
788931
} // namespace
789932
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
790933
} // namespace bigtable_internal

google/cloud/bigtable/internal/operation_context_factory.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,10 @@ std::shared_ptr<OperationContext> MetricsOperationContextFactory::ReadRows(
266266
v.emplace_back(std::make_shared<OperationLatency>(kRpc, provider_));
267267
v.emplace_back(std::make_shared<AttemptLatency>(kRpc, provider_));
268268
v.emplace_back(std::make_shared<RetryCount>(kRpc, provider_));
269+
v.emplace_back(std::make_shared<FirstResponseLatency>(kRpc, provider_));
269270
// v.emplace_back(std::make_shared<ApplicationBlockingLatency>(kRpc,
270-
// provider_)); v.emplace_back(std::make_shared<FirstResponseLatency>(kRpc,
271-
// provider_)); v.emplace_back(std::make_shared<ServerLatency>(kRpc,
271+
// provider_));
272+
// v.emplace_back(std::make_shared<ServerLatency>(kRpc,
272273
// provider_));
273274
// v.emplace_back(std::make_shared<ConnectivityErrorCount>(kRpc,
274275
// provider_));

0 commit comments

Comments
 (0)