Skip to content

Commit cd02090

Browse files
chore: export direct access labels on attempt latencies (#2548)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/java-bigtable/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) - [ ] Rollback plan is reviewed and LGTMed - [ ] All new data plane features have a completed end to end testing plan Fixes #<issue_number_goes_here> ☕️ If you write sample code, please follow the [samples format]( https://togithub.com/GoogleCloudPlatform/java-docs-samples/blob/main/SAMPLE_FORMAT.md).
1 parent 6e6dd0a commit cd02090

File tree

8 files changed

+142
-2
lines changed

8 files changed

+142
-2
lines changed

google-cloud-bigtable/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@
134134
<groupId>com.google.protobuf</groupId>
135135
<artifactId>protobuf-java-util</artifactId>
136136
</dependency>
137+
<dependency>
138+
<groupId>com.google.code.gson</groupId>
139+
<artifactId>gson</artifactId>
140+
</dependency>
137141
<dependency>
138142
<groupId>io.opencensus</groupId>
139143
<artifactId>opencensus-api</artifactId>

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableCloudMonitoringExporter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

1818
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.APPLICATION_BLOCKING_LATENCIES_NAME;
19+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.ATTEMPT_LATENCIES2_NAME;
1920
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.ATTEMPT_LATENCIES_NAME;
2021
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CLIENT_BLOCKING_LATENCIES_NAME;
2122
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CONNECTIVITY_ERROR_COUNT_NAME;
@@ -284,6 +285,7 @@ static class PublicTimeSeriesConverter implements TimeSeriesConverter {
284285
ImmutableSet.of(
285286
OPERATION_LATENCIES_NAME,
286287
ATTEMPT_LATENCIES_NAME,
288+
ATTEMPT_LATENCIES2_NAME,
287289
SERVER_LATENCIES_NAME,
288290
FIRST_RESPONSE_LATENCIES_NAME,
289291
CLIENT_BLOCKING_LATENCIES_NAME,

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableGrpcStreamTracer.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,27 @@
1515
*/
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

18+
import com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsTracer.TransportAttrs;
1819
import io.grpc.ClientStreamTracer;
1920
import io.grpc.Metadata;
21+
import io.grpc.Status;
2022

2123
/**
2224
* Records the time a request is enqueued in a grpc channel queue. This a bridge between gRPC stream
2325
* tracing and Bigtable tracing. Its primary purpose is to measure the transition time between
2426
* asking gRPC to start an RPC and gRPC actually serializing that RPC.
2527
*/
2628
class BigtableGrpcStreamTracer extends ClientStreamTracer {
29+
private static final String GRPC_LB_LOCALITY_KEY = "grpc.lb.locality";
30+
private static final String GRPC_LB_BACKEND_SERVICE_KEY = "grpc.lb.backend_service";
2731

32+
private final StreamInfo info;
2833
private final BigtableTracer tracer;
34+
private volatile String backendService = null;
35+
private volatile String locality = null;
2936

30-
public BigtableGrpcStreamTracer(BigtableTracer tracer) {
37+
public BigtableGrpcStreamTracer(StreamInfo info, BigtableTracer tracer) {
38+
this.info = info;
3139
this.tracer = tracer;
3240
}
3341

@@ -36,6 +44,26 @@ public void outboundMessageSent(int seqNo, long optionalWireSize, long optionalU
3644
tracer.grpcMessageSent();
3745
}
3846

47+
@Override
48+
public void addOptionalLabel(String key, String value) {
49+
switch (key) {
50+
case GRPC_LB_LOCALITY_KEY:
51+
this.locality = value;
52+
break;
53+
case GRPC_LB_BACKEND_SERVICE_KEY:
54+
this.backendService = value;
55+
break;
56+
}
57+
58+
super.addOptionalLabel(key, value);
59+
}
60+
61+
@Override
62+
public void streamClosed(Status status) {
63+
tracer.setTransportAttrs(TransportAttrs.create(locality, backendService));
64+
super.streamClosed(status);
65+
}
66+
3967
static class Factory extends ClientStreamTracer.Factory {
4068

4169
private final BigtableTracer tracer;
@@ -47,7 +75,7 @@ static class Factory extends ClientStreamTracer.Factory {
4775
@Override
4876
public ClientStreamTracer newClientStreamTracer(
4977
ClientStreamTracer.StreamInfo info, Metadata headers) {
50-
return new BigtableGrpcStreamTracer(tracer);
78+
return new BigtableGrpcStreamTracer(info, tracer);
5179
}
5280
}
5381
}

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BigtableTracer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ public void setLocations(String zone, String cluster) {
9191
// noop
9292
}
9393

94+
/** Set the underlying transport used to process the attempt */
95+
public void setTransportAttrs(BuiltinMetricsTracer.TransportAttrs attrs) {}
96+
9497
@Deprecated
9598
/** @deprecated {@link #grpcMessageSent()} is called instead. */
9699
public void grpcChannelQueuedLatencies(long queuedTimeMs) {

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsConstants.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,18 @@ public class BuiltinMetricsConstants {
5050
static final AttributeKey<String> STATUS_KEY = AttributeKey.stringKey("status");
5151
static final AttributeKey<String> CLIENT_UID_KEY = AttributeKey.stringKey("client_uid");
5252

53+
static final AttributeKey<String> TRANSPORT_TYPE = AttributeKey.stringKey("transport_type");
54+
static final AttributeKey<String> TRANSPORT_REGION = AttributeKey.stringKey("transport_region");
55+
static final AttributeKey<String> TRANSPORT_ZONE = AttributeKey.stringKey("transport_zone");
56+
static final AttributeKey<String> TRANSPORT_SUBZONE = AttributeKey.stringKey("transport_subzone");
57+
5358
public static final String METER_NAME = "bigtable.googleapis.com/internal/client/";
5459

5560
// Metric names
5661
public static final String OPERATION_LATENCIES_NAME = "operation_latencies";
5762
public static final String ATTEMPT_LATENCIES_NAME = "attempt_latencies";
63+
// Temporary workaround for not being able to add new labels to ATTEMPT_LATENCIES_NAME
64+
public static final String ATTEMPT_LATENCIES2_NAME = "attempt_latencies2";
5865
static final String RETRY_COUNT_NAME = "retry_count";
5966
static final String CONNECTIVITY_ERROR_COUNT_NAME = "connectivity_error_count";
6067
static final String SERVER_LATENCIES_NAME = "server_latencies";
@@ -211,6 +218,22 @@ public static Map<InstrumentSelector, View> getAllViews() {
211218
.addAll(COMMON_ATTRIBUTES)
212219
.add(STREAMING_KEY, STATUS_KEY)
213220
.build());
221+
defineView(
222+
views,
223+
ATTEMPT_LATENCIES2_NAME,
224+
AGGREGATION_WITH_MILLIS_HISTOGRAM,
225+
InstrumentType.HISTOGRAM,
226+
"ms",
227+
ImmutableSet.<AttributeKey>builder()
228+
.addAll(COMMON_ATTRIBUTES)
229+
.add(
230+
STREAMING_KEY,
231+
STATUS_KEY,
232+
TRANSPORT_TYPE,
233+
TRANSPORT_REGION,
234+
TRANSPORT_ZONE,
235+
TRANSPORT_SUBZONE)
236+
.build());
214237
defineView(
215238
views,
216239
SERVER_LATENCIES_NAME,

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,34 @@
2323
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.STATUS_KEY;
2424
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.STREAMING_KEY;
2525
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.TABLE_ID_KEY;
26+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.TRANSPORT_REGION;
27+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.TRANSPORT_SUBZONE;
28+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.TRANSPORT_TYPE;
29+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.TRANSPORT_ZONE;
2630
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.ZONE_ID_KEY;
2731

2832
import com.google.api.core.ObsoleteApi;
2933
import com.google.api.gax.retrying.ServerStreamingAttemptException;
3034
import com.google.api.gax.tracing.SpanName;
35+
import com.google.auto.value.AutoValue;
3136
import com.google.cloud.bigtable.Version;
3237
import com.google.common.base.Stopwatch;
38+
import com.google.common.base.Strings;
3339
import com.google.common.math.IntMath;
40+
import com.google.gson.Gson;
41+
import com.google.gson.reflect.TypeToken;
3442
import io.grpc.Deadline;
3543
import io.opentelemetry.api.common.Attributes;
3644
import io.opentelemetry.api.metrics.DoubleHistogram;
3745
import io.opentelemetry.api.metrics.LongCounter;
3846
import java.time.Duration;
47+
import java.util.Map;
3948
import java.util.concurrent.CancellationException;
4049
import java.util.concurrent.TimeUnit;
4150
import java.util.concurrent.atomic.AtomicBoolean;
4251
import java.util.concurrent.atomic.AtomicInteger;
4352
import java.util.concurrent.atomic.AtomicLong;
53+
import java.util.logging.Level;
4454
import java.util.logging.Logger;
4555
import javax.annotation.Nullable;
4656

@@ -49,8 +59,23 @@
4959
* bigtable.googleapis.com/client namespace
5060
*/
5161
class BuiltinMetricsTracer extends BigtableTracer {
62+
@AutoValue
63+
abstract static class TransportAttrs {
64+
@Nullable
65+
abstract String getLocality();
66+
67+
@Nullable
68+
abstract String getBackendService();
69+
70+
static TransportAttrs create(@Nullable String locality, @Nullable String backendService) {
71+
return new AutoValue_BuiltinMetricsTracer_TransportAttrs(locality, backendService);
72+
}
73+
}
5274

5375
private static final Logger logger = Logger.getLogger(BuiltinMetricsTracer.class.getName());
76+
private static final Gson GSON = new Gson();
77+
private static final TypeToken<Map<String, String>> LOCALITY_TYPE =
78+
new TypeToken<Map<String, String>>() {};
5479

5580
private static final String NAME = "java-bigtable/" + Version.VERSION;
5681
private final OperationType operationType;
@@ -95,12 +120,15 @@ class BuiltinMetricsTracer extends BigtableTracer {
95120
private Deadline operationDeadline = null;
96121
private volatile long remainingDeadlineAtAttemptStart = 0;
97122

123+
private TransportAttrs transportAttrs = null;
124+
98125
// OpenCensus (and server) histogram buckets use [start, end), however OpenTelemetry uses (start,
99126
// end]. To work around this, we measure all the latencies in nanoseconds and convert them
100127
// to milliseconds and use DoubleHistogram. This should minimize the chance of a data
101128
// point fall on the bucket boundary that causes off by one errors.
102129
private final DoubleHistogram operationLatenciesHistogram;
103130
private final DoubleHistogram attemptLatenciesHistogram;
131+
private final DoubleHistogram attemptLatencies2Histogram;
104132
private final DoubleHistogram serverLatenciesHistogram;
105133
private final DoubleHistogram firstResponseLatenciesHistogram;
106134
private final DoubleHistogram clientBlockingLatenciesHistogram;
@@ -115,6 +143,7 @@ class BuiltinMetricsTracer extends BigtableTracer {
115143
Attributes attributes,
116144
DoubleHistogram operationLatenciesHistogram,
117145
DoubleHistogram attemptLatenciesHistogram,
146+
DoubleHistogram attemptLatencies2Histogram,
118147
DoubleHistogram serverLatenciesHistogram,
119148
DoubleHistogram firstResponseLatenciesHistogram,
120149
DoubleHistogram clientBlockingLatenciesHistogram,
@@ -128,6 +157,7 @@ class BuiltinMetricsTracer extends BigtableTracer {
128157

129158
this.operationLatenciesHistogram = operationLatenciesHistogram;
130159
this.attemptLatenciesHistogram = attemptLatenciesHistogram;
160+
this.attemptLatencies2Histogram = attemptLatencies2Histogram;
131161
this.serverLatenciesHistogram = serverLatenciesHistogram;
132162
this.firstResponseLatenciesHistogram = firstResponseLatenciesHistogram;
133163
this.clientBlockingLatenciesHistogram = clientBlockingLatenciesHistogram;
@@ -301,6 +331,11 @@ public void setLocations(String zone, String cluster) {
301331
this.cluster = cluster;
302332
}
303333

334+
@Override
335+
public void setTransportAttrs(TransportAttrs attrs) {
336+
this.transportAttrs = attrs;
337+
}
338+
304339
@Override
305340
public void batchRequestThrottled(long throttledTimeNanos) {
306341
totalClientBlockingTime.addAndGet(java.time.Duration.ofNanos(throttledTimeNanos).toMillis());
@@ -417,6 +452,35 @@ private void recordAttemptCompletion(@Nullable Throwable status) {
417452
attemptLatenciesHistogram.record(
418453
convertToMs(attemptTimer.elapsed(TimeUnit.NANOSECONDS)), attributes);
419454

455+
String transportType = "cloudpath";
456+
String transportRegion = "";
457+
String transportZone = "";
458+
String transportSubzone = "";
459+
460+
try {
461+
if (transportAttrs != null && !Strings.isNullOrEmpty(transportAttrs.getLocality())) {
462+
// only directpath has locality
463+
transportType = "directpath";
464+
Map<String, String> localityMap =
465+
GSON.fromJson(transportAttrs.getLocality(), LOCALITY_TYPE);
466+
transportRegion = localityMap.getOrDefault("region", "");
467+
transportZone = localityMap.getOrDefault("zone", "");
468+
transportSubzone = localityMap.getOrDefault("sub_zone", "");
469+
}
470+
} catch (RuntimeException e) {
471+
logger.log(
472+
Level.WARNING, "Failed to parse transport locality: " + transportAttrs.getLocality(), e);
473+
}
474+
attemptLatencies2Histogram.record(
475+
convertToMs(attemptTimer.elapsed(TimeUnit.NANOSECONDS)),
476+
attributes
477+
.toBuilder()
478+
.put(TRANSPORT_TYPE, transportType)
479+
.put(TRANSPORT_REGION, transportRegion)
480+
.put(TRANSPORT_ZONE, transportZone)
481+
.put(TRANSPORT_SUBZONE, transportSubzone)
482+
.build());
483+
420484
// When operationDeadline is set, it's possible that the deadline is passed by the time we send
421485
// a new attempt. In this case we'll record 0.
422486
if (operationDeadline != null) {

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracerFactory.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package com.google.cloud.bigtable.data.v2.stub.metrics;
1717

1818
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.APPLICATION_BLOCKING_LATENCIES_NAME;
19+
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.ATTEMPT_LATENCIES2_NAME;
1920
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.ATTEMPT_LATENCIES_NAME;
2021
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CLIENT_BLOCKING_LATENCIES_NAME;
2122
import static com.google.cloud.bigtable.data.v2.stub.metrics.BuiltinMetricsConstants.CONNECTIVITY_ERROR_COUNT_NAME;
@@ -52,6 +53,7 @@ public class BuiltinMetricsTracerFactory extends BaseApiTracerFactory {
5253

5354
private final DoubleHistogram operationLatenciesHistogram;
5455
private final DoubleHistogram attemptLatenciesHistogram;
56+
private final DoubleHistogram attemptLatencies2Histogram;
5557
private final DoubleHistogram serverLatenciesHistogram;
5658
private final DoubleHistogram firstResponseLatenciesHistogram;
5759
private final DoubleHistogram clientBlockingLatenciesHistogram;
@@ -82,6 +84,12 @@ public static BuiltinMetricsTracerFactory create(
8284
.setDescription("Client observed latency per RPC attempt.")
8385
.setUnit(MILLISECOND)
8486
.build();
87+
attemptLatencies2Histogram =
88+
meter
89+
.histogramBuilder(ATTEMPT_LATENCIES2_NAME)
90+
.setDescription("Client observed latency per RPC attempt with transport labels.")
91+
.setUnit(MILLISECOND)
92+
.build();
8593
serverLatenciesHistogram =
8694
meter
8795
.histogramBuilder(SERVER_LATENCIES_NAME)
@@ -140,6 +148,7 @@ public ApiTracer newTracer(ApiTracer parent, SpanName spanName, OperationType op
140148
attributes,
141149
operationLatenciesHistogram,
142150
attemptLatenciesHistogram,
151+
attemptLatencies2Histogram,
143152
serverLatenciesHistogram,
144153
firstResponseLatenciesHistogram,
145154
clientBlockingLatenciesHistogram,

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/CompositeTracer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,13 @@ public void setLocations(String zone, String cluster) {
218218
}
219219
}
220220

221+
@Override
222+
public void setTransportAttrs(BuiltinMetricsTracer.TransportAttrs attrs) {
223+
for (BigtableTracer tracer : bigtableTracers) {
224+
tracer.setTransportAttrs(attrs);
225+
}
226+
}
227+
221228
@Override
222229
public void onRequest(int requestCount) {
223230
for (BigtableTracer tracer : bigtableTracers) {

0 commit comments

Comments
 (0)