Skip to content

Commit 855ac89

Browse files
authored
impl(bigtable): add first three metrics (#15330)
1 parent 272f1ab commit 855ac89

File tree

5 files changed

+1012
-88
lines changed

5 files changed

+1012
-88
lines changed

google/cloud/bigtable/internal/metrics.cc

Lines changed: 166 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,59 @@
1616

1717
#include "google/cloud/bigtable/internal/metrics.h"
1818
#include "google/cloud/bigtable/version.h"
19+
#include <algorithm>
20+
#include <map>
21+
#include <set>
1922

2023
namespace google {
2124
namespace cloud {
2225
namespace bigtable_internal {
2326
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
27+
namespace {
28+
auto constexpr kMeterInstrumentationScopeVersion = "v1";
29+
} // namespace
2430

25-
LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d) {
26-
return {
27-
{"project_id", r.project_id},
28-
{"instance", r.instance},
29-
{"table", r.table},
30-
{"cluster", r.cluster},
31-
{"zone", r.zone},
31+
// TODO(#15329): Refactor how we're handling different data labels for
32+
// the various RPCs. Adding a function to each metric type to add its DataLabels
33+
// to the map should be more performant than performing a set_difference every
34+
// time.
35+
LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d,
36+
std::set<std::string> const& filtered_data_labels) {
37+
LabelMap labels = {{"project_id", r.project_id},
38+
{"instance", r.instance},
39+
{"table", r.table},
40+
{"cluster", r.cluster},
41+
{"zone", r.zone}};
42+
std::map<std::string, std::string> data = {{
3243
{"method", d.method},
3344
{"streaming", d.streaming},
3445
{"client_name", d.client_name},
3546
{"client_uid", d.client_uid},
3647
{"app_profile", d.app_profile},
3748
{"status", d.status},
49+
}};
50+
51+
if (filtered_data_labels.empty()) {
52+
labels.insert(data.begin(), data.end());
53+
return labels;
54+
}
55+
56+
struct Compare {
57+
bool operator()(std::pair<std::string const, std::string> const& a,
58+
std::string const& b) {
59+
return a.first < b;
60+
}
61+
62+
bool operator()(std::string const& a,
63+
std::pair<std::string const, std::string> const& b) {
64+
return a < b.first;
65+
}
3866
};
67+
68+
std::set_difference(data.begin(), data.end(), filtered_data_labels.begin(),
69+
filtered_data_labels.end(),
70+
std::inserter(labels, labels.begin()), Compare());
71+
return labels;
3972
}
4073

4174
absl::optional<google::bigtable::v2::ResponseParams>
@@ -54,6 +87,132 @@ GetResponseParamsFromTrailingMetadata(
5487

5588
Metric::~Metric() = default;
5689

90+
OperationLatency::OperationLatency(
91+
std::string const& instrumentation_scope,
92+
opentelemetry::nostd::shared_ptr<
93+
opentelemetry::metrics::MeterProvider> const& provider)
94+
: operation_latencies_(provider
95+
->GetMeter(instrumentation_scope,
96+
kMeterInstrumentationScopeVersion)
97+
->CreateDoubleHistogram("operation_latencies")
98+
.release()) {}
99+
100+
void OperationLatency::PreCall(opentelemetry::context::Context const&,
101+
PreCallParams const& p) {
102+
if (p.first_attempt) {
103+
operation_start_ = p.attempt_start;
104+
}
105+
}
106+
107+
void OperationLatency::PostCall(opentelemetry::context::Context const&,
108+
grpc::ClientContext const& client_context,
109+
PostCallParams const&) {
110+
auto response_params = GetResponseParamsFromTrailingMetadata(client_context);
111+
if (response_params) {
112+
resource_labels_.cluster = response_params->cluster_id();
113+
resource_labels_.zone = response_params->zone_id();
114+
}
115+
}
116+
117+
void OperationLatency::OnDone(opentelemetry::context::Context const& context,
118+
OnDoneParams const& p) {
119+
data_labels_.status = StatusCodeToString(p.operation_status.code());
120+
auto operation_elapsed = std::chrono::duration_cast<LatencyDuration>(
121+
p.operation_end - operation_start_);
122+
operation_latencies_->Record(operation_elapsed.count(),
123+
IntoLabelMap(resource_labels_, data_labels_),
124+
context);
125+
}
126+
127+
std::unique_ptr<Metric> OperationLatency::clone(ResourceLabels resource_labels,
128+
DataLabels data_labels) const {
129+
auto m = std::make_unique<OperationLatency>(*this);
130+
m->resource_labels_ = std::move(resource_labels);
131+
m->data_labels_ = std::move(data_labels);
132+
return m;
133+
}
134+
135+
AttemptLatency::AttemptLatency(
136+
std::string const& instrumentation_scope,
137+
opentelemetry::nostd::shared_ptr<
138+
opentelemetry::metrics::MeterProvider> const& provider)
139+
: attempt_latencies_(provider
140+
->GetMeter(instrumentation_scope,
141+
kMeterInstrumentationScopeVersion)
142+
->CreateDoubleHistogram("attempt_latencies")) {}
143+
144+
void AttemptLatency::PreCall(opentelemetry::context::Context const&,
145+
PreCallParams const& p) {
146+
attempt_start_ = std::move(p.attempt_start);
147+
}
148+
149+
void AttemptLatency::PostCall(opentelemetry::context::Context const& context,
150+
grpc::ClientContext const& client_context,
151+
PostCallParams const& p) {
152+
auto response_params = GetResponseParamsFromTrailingMetadata(client_context);
153+
if (response_params) {
154+
resource_labels_.cluster = response_params->cluster_id();
155+
resource_labels_.zone = response_params->zone_id();
156+
}
157+
data_labels_.status = StatusCodeToString(p.attempt_status.code());
158+
auto attempt_elapsed = std::chrono::duration_cast<LatencyDuration>(
159+
p.attempt_end - attempt_start_);
160+
auto m = IntoLabelMap(resource_labels_, data_labels_);
161+
attempt_latencies_->Record(attempt_elapsed.count(), std::move(m), context);
162+
}
163+
164+
std::unique_ptr<Metric> AttemptLatency::clone(ResourceLabels resource_labels,
165+
DataLabels data_labels) const {
166+
auto m = std::make_unique<AttemptLatency>(*this);
167+
m->resource_labels_ = std::move(resource_labels);
168+
m->data_labels_ = std::move(data_labels);
169+
return m;
170+
}
171+
172+
RetryCount::RetryCount(
173+
std::string const& instrumentation_scope,
174+
opentelemetry::nostd::shared_ptr<
175+
opentelemetry::metrics::MeterProvider> const& provider)
176+
: retry_count_(provider
177+
->GetMeter(instrumentation_scope,
178+
kMeterInstrumentationScopeVersion)
179+
->CreateUInt64Counter("retry_count")
180+
.release()) {}
181+
182+
void RetryCount::PreCall(opentelemetry::context::Context const&,
183+
PreCallParams const& p) {
184+
if (!p.first_attempt) {
185+
++num_retries_;
186+
}
187+
}
188+
189+
void RetryCount::PostCall(opentelemetry::context::Context const&,
190+
grpc::ClientContext const& client_context,
191+
PostCallParams const&) {
192+
auto response_params = GetResponseParamsFromTrailingMetadata(client_context);
193+
if (response_params) {
194+
resource_labels_.cluster = response_params->cluster_id();
195+
resource_labels_.zone = response_params->zone_id();
196+
}
197+
}
198+
199+
void RetryCount::OnDone(opentelemetry::context::Context const& context,
200+
OnDoneParams const& p) {
201+
data_labels_.status = StatusCodeToString(p.operation_status.code());
202+
retry_count_->Add(num_retries_,
203+
IntoLabelMap(resource_labels_, data_labels_,
204+
std::set<std::string>{"streaming"}),
205+
context);
206+
}
207+
208+
std::unique_ptr<Metric> RetryCount::clone(ResourceLabels resource_labels,
209+
DataLabels data_labels) const {
210+
auto m = std::make_unique<RetryCount>(*this);
211+
m->resource_labels_ = std::move(resource_labels);
212+
m->data_labels_ = std::move(data_labels);
213+
return m;
214+
}
215+
57216
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
58217
} // namespace bigtable_internal
59218
} // namespace cloud

google/cloud/bigtable/internal/metrics.h

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
#include <google/bigtable/v2/response_params.pb.h>
2424
#include <grpcpp/grpcpp.h>
2525
#include <opentelemetry/context/context.h>
26+
#include <opentelemetry/metrics/meter.h>
27+
#include <opentelemetry/metrics/meter_provider.h>
28+
#include <opentelemetry/metrics/sync_instruments.h>
2629
#include <memory>
2730
#include <string>
2831
#include <unordered_map>
@@ -50,7 +53,8 @@ struct DataLabels {
5053
};
5154

5255
using LabelMap = std::unordered_map<std::string, std::string>;
53-
LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d);
56+
LabelMap IntoLabelMap(ResourceLabels const& r, DataLabels const& d,
57+
std::set<std::string> const& filtered_data_labels = {});
5458

5559
absl::optional<google::bigtable::v2::ResponseParams>
5660
GetResponseParamsFromTrailingMetadata(
@@ -99,6 +103,75 @@ class Metric {
99103
DataLabels data_labels) const = 0;
100104
};
101105

106+
class OperationLatency : public Metric {
107+
public:
108+
explicit OperationLatency(
109+
std::string const& instrumentation_scope,
110+
opentelemetry::nostd::shared_ptr<
111+
opentelemetry::metrics::MeterProvider> const& provider);
112+
void PreCall(opentelemetry::context::Context const&,
113+
PreCallParams const& p) override;
114+
void PostCall(opentelemetry::context::Context const& context,
115+
grpc::ClientContext const& client_context,
116+
PostCallParams const& p) override;
117+
void OnDone(opentelemetry::context::Context const& context,
118+
OnDoneParams const& p) override;
119+
std::unique_ptr<Metric> clone(ResourceLabels resource_labels,
120+
DataLabels data_labels) const override;
121+
122+
private:
123+
ResourceLabels resource_labels_;
124+
DataLabels data_labels_;
125+
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::Histogram<double>>
126+
operation_latencies_;
127+
OperationContext::Clock::time_point operation_start_;
128+
};
129+
130+
class AttemptLatency : public Metric {
131+
public:
132+
AttemptLatency(std::string const& instrumentation_scope,
133+
opentelemetry::nostd::shared_ptr<
134+
opentelemetry::metrics::MeterProvider> const& provider);
135+
void PreCall(opentelemetry::context::Context const&,
136+
PreCallParams const& p) override;
137+
void PostCall(opentelemetry::context::Context const& context,
138+
grpc::ClientContext const& client_context,
139+
PostCallParams const& p) override;
140+
std::unique_ptr<Metric> clone(ResourceLabels resource_labels,
141+
DataLabels data_labels) const override;
142+
143+
private:
144+
ResourceLabels resource_labels_;
145+
DataLabels data_labels_;
146+
opentelemetry::nostd::shared_ptr<opentelemetry::metrics::Histogram<double>>
147+
attempt_latencies_;
148+
OperationContext::Clock::time_point attempt_start_;
149+
};
150+
151+
class RetryCount : public Metric {
152+
public:
153+
RetryCount(std::string const& instrumentation_scope,
154+
opentelemetry::nostd::shared_ptr<
155+
opentelemetry::metrics::MeterProvider> const& provider);
156+
void PreCall(opentelemetry::context::Context const&,
157+
PreCallParams const&) override;
158+
void PostCall(opentelemetry::context::Context const& context,
159+
grpc::ClientContext const& client_context,
160+
PostCallParams const& p) override;
161+
void OnDone(opentelemetry::context::Context const& context,
162+
OnDoneParams const& p) override;
163+
std::unique_ptr<Metric> clone(ResourceLabels resource_labels,
164+
DataLabels data_labels) const override;
165+
166+
private:
167+
ResourceLabels resource_labels_;
168+
DataLabels data_labels_;
169+
std::uint64_t num_retries_ = 0;
170+
opentelemetry::nostd::shared_ptr<
171+
opentelemetry::metrics::Counter<std::uint64_t>>
172+
retry_count_;
173+
};
174+
102175
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
103176
} // namespace bigtable_internal
104177
} // namespace cloud

0 commit comments

Comments
 (0)