Skip to content

Commit fbf8304

Browse files
authored
Add new components to allow for generating metrics from 100% of spans without impacting sampling (#802)
1 parent d305bf6 commit fbf8304

12 files changed

+1703
-0
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.contrib.awsxray;
7+
8+
import io.opentelemetry.api.common.Attributes;
9+
import io.opentelemetry.api.trace.SpanKind;
10+
import io.opentelemetry.api.trace.TraceState;
11+
import io.opentelemetry.context.Context;
12+
import io.opentelemetry.sdk.trace.data.LinkData;
13+
import io.opentelemetry.sdk.trace.samplers.Sampler;
14+
import io.opentelemetry.sdk.trace.samplers.SamplingDecision;
15+
import io.opentelemetry.sdk.trace.samplers.SamplingResult;
16+
import java.util.List;
17+
import javax.annotation.concurrent.Immutable;
18+
19+
/**
20+
* This sampler will return the sampling result of the provided {@link #rootSampler}, unless the
21+
* sampling result contains the sampling decision {@link SamplingDecision#DROP}, in which case, a
22+
* new sampling result will be returned that is functionally equivalent to the original, except that
23+
* it contains the sampling decision {@link SamplingDecision#RECORD_ONLY}. This ensures that all
24+
* spans are recorded, with no change to sampling.
25+
*
26+
* <p>The intended use case of this sampler is to provide a means of sending all spans to a
27+
* processor without having an impact on the sampling rate. This may be desirable if a user wishes
28+
* to count or otherwise measure all spans produced in a service, without incurring the cost of 100%
29+
* sampling.
30+
*/
31+
@Immutable
32+
public final class AlwaysRecordSampler implements Sampler {
33+
34+
private final Sampler rootSampler;
35+
36+
public static AlwaysRecordSampler create(Sampler rootSampler) {
37+
return new AlwaysRecordSampler(rootSampler);
38+
}
39+
40+
private AlwaysRecordSampler(Sampler rootSampler) {
41+
this.rootSampler = rootSampler;
42+
}
43+
44+
@Override
45+
public SamplingResult shouldSample(
46+
Context parentContext,
47+
String traceId,
48+
String name,
49+
SpanKind spanKind,
50+
Attributes attributes,
51+
List<LinkData> parentLinks) {
52+
SamplingResult result =
53+
rootSampler.shouldSample(parentContext, traceId, name, spanKind, attributes, parentLinks);
54+
if (result.getDecision() == SamplingDecision.DROP) {
55+
result = wrapResultWithRecordOnlyResult(result);
56+
}
57+
58+
return result;
59+
}
60+
61+
@Override
62+
public String getDescription() {
63+
return "AlwaysRecordSampler{" + rootSampler.getDescription() + "}";
64+
}
65+
66+
private static SamplingResult wrapResultWithRecordOnlyResult(SamplingResult result) {
67+
return new SamplingResult() {
68+
@Override
69+
public SamplingDecision getDecision() {
70+
return SamplingDecision.RECORD_ONLY;
71+
}
72+
73+
@Override
74+
public Attributes getAttributes() {
75+
return result.getAttributes();
76+
}
77+
78+
@Override
79+
public TraceState getUpdatedTraceState(TraceState parentTraceState) {
80+
return result.getUpdatedTraceState(parentTraceState);
81+
}
82+
};
83+
}
84+
}

aws-xray/src/main/java/io/opentelemetry/contrib/awsxray/AwsAttributeKeys.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ final class AwsAttributeKeys {
1212

1313
private AwsAttributeKeys() {}
1414

15+
static final AttributeKey<String> AWS_SPAN_KIND = AttributeKey.stringKey("aws.span.kind");
16+
17+
static final AttributeKey<String> AWS_LOCAL_SERVICE = AttributeKey.stringKey("aws.local.service");
18+
1519
static final AttributeKey<String> AWS_LOCAL_OPERATION =
1620
AttributeKey.stringKey("aws.local.operation");
1721

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.contrib.awsxray;
7+
8+
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_LOCAL_OPERATION;
9+
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_LOCAL_SERVICE;
10+
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_REMOTE_OPERATION;
11+
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_REMOTE_SERVICE;
12+
import static io.opentelemetry.contrib.awsxray.AwsAttributeKeys.AWS_SPAN_KIND;
13+
import static io.opentelemetry.semconv.resource.attributes.ResourceAttributes.SERVICE_NAME;
14+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.DB_OPERATION;
15+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.DB_SYSTEM;
16+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.FAAS_INVOKED_NAME;
17+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.FAAS_INVOKED_PROVIDER;
18+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.GRAPHQL_OPERATION_TYPE;
19+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.MESSAGING_OPERATION;
20+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.MESSAGING_SYSTEM;
21+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.PEER_SERVICE;
22+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.RPC_METHOD;
23+
import static io.opentelemetry.semconv.trace.attributes.SemanticAttributes.RPC_SERVICE;
24+
25+
import io.opentelemetry.api.common.AttributeKey;
26+
import io.opentelemetry.api.common.Attributes;
27+
import io.opentelemetry.api.common.AttributesBuilder;
28+
import io.opentelemetry.api.trace.SpanKind;
29+
import io.opentelemetry.sdk.resources.Resource;
30+
import io.opentelemetry.sdk.trace.data.SpanData;
31+
import io.opentelemetry.semconv.resource.attributes.ResourceAttributes;
32+
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
33+
import java.util.logging.Level;
34+
import java.util.logging.Logger;
35+
36+
/**
37+
* AwsMetricAttributeGenerator generates very specific metric attributes based on low-cardinality
38+
* span and resource attributes. If such attributes are not present, we fallback to default values.
39+
*
40+
* <p>The goal of these particular metric attributes is to get metrics for incoming and outgoing
41+
* traffic for a service. Namely, {@link SpanKind#SERVER} and {@link SpanKind#CONSUMER} spans
42+
* represent "incoming" traffic, {@link SpanKind#CLIENT} and {@link SpanKind#PRODUCER} spans
43+
* represent "outgoing" traffic, and {@link SpanKind#INTERNAL} spans are ignored.
44+
*/
45+
final class AwsMetricAttributeGenerator implements MetricAttributeGenerator {
46+
47+
private static final Logger logger =
48+
Logger.getLogger(AwsMetricAttributeGenerator.class.getName());
49+
50+
// Special SERVICE attribute value if GRAPHQL_OPERATION_TYPE attribute key is present.
51+
private static final String GRAPHQL = "graphql";
52+
53+
// Default attribute values if no valid span attribute value is identified
54+
private static final String UNKNOWN_SERVICE = "UnknownService";
55+
private static final String UNKNOWN_OPERATION = "UnknownOperation";
56+
private static final String UNKNOWN_REMOTE_SERVICE = "UnknownRemoteService";
57+
private static final String UNKNOWN_REMOTE_OPERATION = "UnknownRemoteOperation";
58+
59+
@Override
60+
public Attributes generateMetricAttributesFromSpan(SpanData span, Resource resource) {
61+
AttributesBuilder builder = Attributes.builder();
62+
switch (span.getKind()) {
63+
case CONSUMER:
64+
case SERVER:
65+
setService(resource, span, builder);
66+
setIngressOperation(span, builder);
67+
setSpanKind(span, builder);
68+
break;
69+
case PRODUCER:
70+
case CLIENT:
71+
setService(resource, span, builder);
72+
setEgressOperation(span, builder);
73+
setRemoteServiceAndOperation(span, builder);
74+
setSpanKind(span, builder);
75+
break;
76+
default:
77+
// Add no attributes, signalling no metrics should be emitted.
78+
}
79+
return builder.build();
80+
}
81+
82+
/** Service is always derived from {@link ResourceAttributes#SERVICE_NAME} */
83+
private static void setService(Resource resource, SpanData span, AttributesBuilder builder) {
84+
String service = resource.getAttribute(SERVICE_NAME);
85+
if (service == null) {
86+
logUnknownAttribute(AWS_LOCAL_SERVICE, span);
87+
service = UNKNOWN_SERVICE;
88+
}
89+
builder.put(AWS_LOCAL_SERVICE, service);
90+
}
91+
92+
/**
93+
* Ingress operation (i.e. operation for Server and Consumer spans) is always derived from span
94+
* name.
95+
*/
96+
private static void setIngressOperation(SpanData span, AttributesBuilder builder) {
97+
String operation = span.getName();
98+
if (operation == null) {
99+
logUnknownAttribute(AWS_LOCAL_OPERATION, span);
100+
operation = UNKNOWN_OPERATION;
101+
}
102+
builder.put(AWS_LOCAL_OPERATION, operation);
103+
}
104+
105+
/**
106+
* Egress operation (i.e. operation for Client and Producer spans) is always derived from a
107+
* special span attribute, {@link AwsAttributeKeys#AWS_LOCAL_OPERATION}. This attribute is
108+
* generated with a separate SpanProcessor, {@link AttributePropagatingSpanProcessor}
109+
*/
110+
private static void setEgressOperation(SpanData span, AttributesBuilder builder) {
111+
String operation = span.getAttributes().get(AWS_LOCAL_OPERATION);
112+
if (operation == null) {
113+
logUnknownAttribute(AWS_LOCAL_OPERATION, span);
114+
operation = UNKNOWN_OPERATION;
115+
}
116+
builder.put(AWS_LOCAL_OPERATION, operation);
117+
}
118+
119+
/**
120+
* Remote attributes (only for Client and Producer spans) are generated based on low-cardinality
121+
* span attributes, in priority order.
122+
*
123+
* <p>The first priority is the AWS Remote attributes, which are generated from manually
124+
* instrumented span attributes, and are clear indications of customer intent. If AWS Remote
125+
* attributes are not present, the next highest priority span attribute is Peer Service, which is
126+
* also a reliable indicator of customer intent. If this is set, it will override
127+
* AWS_REMOTE_SERVICE identified from any other span attribute, other than AWS Remote attributes.
128+
*
129+
* <p>After this, we look for the following low-cardinality span attributes that can be used to
130+
* determine the remote metric attributes:
131+
*
132+
* <ul>
133+
* <li>RPC
134+
* <li>DB
135+
* <li>FAAS
136+
* <li>Messaging
137+
* <li>GraphQL - Special case, if {@link SemanticAttributes#GRAPHQL_OPERATION_TYPE} is present,
138+
* we use it for RemoteOperation and set RemoteService to {@link #GRAPHQL}.
139+
* </ul>
140+
*
141+
* <p>In each case, these span attributes were selected from the OpenTelemetry trace semantic
142+
* convention specifications as they adhere to the three following criteria:
143+
*
144+
* <ul>
145+
* <li>Attributes are meaningfully indicative of remote service/operation names.
146+
* <li>Attributes are defined in the specification to be low cardinality, usually with a low-
147+
* cardinality list of values.
148+
* <li>Attributes are confirmed to have low-cardinality values, based on code analysis.
149+
* </ul>
150+
*
151+
* TODO: This specific logic may change in future. Specifically, we are still deciding which HTTP
152+
* and RPC attributes to use here, but this is a sufficient starting point.
153+
*/
154+
private static void setRemoteServiceAndOperation(SpanData span, AttributesBuilder builder) {
155+
if (isKeyPresent(span, AWS_REMOTE_SERVICE) || isKeyPresent(span, AWS_REMOTE_OPERATION)) {
156+
setRemoteService(span, builder, AWS_REMOTE_SERVICE);
157+
setRemoteOperation(span, builder, AWS_REMOTE_OPERATION);
158+
} else if (isKeyPresent(span, RPC_SERVICE) || isKeyPresent(span, RPC_METHOD)) {
159+
setRemoteService(span, builder, RPC_SERVICE);
160+
setRemoteOperation(span, builder, RPC_METHOD);
161+
} else if (isKeyPresent(span, DB_SYSTEM) || isKeyPresent(span, DB_OPERATION)) {
162+
setRemoteService(span, builder, DB_SYSTEM);
163+
setRemoteOperation(span, builder, DB_OPERATION);
164+
} else if (isKeyPresent(span, FAAS_INVOKED_PROVIDER) || isKeyPresent(span, FAAS_INVOKED_NAME)) {
165+
setRemoteService(span, builder, FAAS_INVOKED_PROVIDER);
166+
setRemoteOperation(span, builder, FAAS_INVOKED_NAME);
167+
} else if (isKeyPresent(span, MESSAGING_SYSTEM) || isKeyPresent(span, MESSAGING_OPERATION)) {
168+
setRemoteService(span, builder, MESSAGING_SYSTEM);
169+
setRemoteOperation(span, builder, MESSAGING_OPERATION);
170+
} else if (isKeyPresent(span, GRAPHQL_OPERATION_TYPE)) {
171+
builder.put(AWS_REMOTE_SERVICE, GRAPHQL);
172+
setRemoteOperation(span, builder, GRAPHQL_OPERATION_TYPE);
173+
} else {
174+
logUnknownAttribute(AWS_REMOTE_SERVICE, span);
175+
builder.put(AWS_REMOTE_SERVICE, UNKNOWN_REMOTE_SERVICE);
176+
logUnknownAttribute(AWS_REMOTE_OPERATION, span);
177+
builder.put(AWS_REMOTE_OPERATION, UNKNOWN_REMOTE_OPERATION);
178+
}
179+
180+
// Peer service takes priority as RemoteService over everything but AWS Remote.
181+
if (isKeyPresent(span, PEER_SERVICE) && !isKeyPresent(span, AWS_REMOTE_SERVICE)) {
182+
setRemoteService(span, builder, PEER_SERVICE);
183+
}
184+
}
185+
186+
/** Span kind is needed for differentiating metrics in the EMF exporter */
187+
private static void setSpanKind(SpanData span, AttributesBuilder builder) {
188+
String spanKind = span.getKind().name();
189+
builder.put(AWS_SPAN_KIND, spanKind);
190+
}
191+
192+
private static boolean isKeyPresent(SpanData span, AttributeKey<String> key) {
193+
return span.getAttributes().get(key) != null;
194+
}
195+
196+
private static void setRemoteService(
197+
SpanData span, AttributesBuilder builder, AttributeKey<String> remoteServiceKey) {
198+
String remoteService = span.getAttributes().get(remoteServiceKey);
199+
if (remoteService == null) {
200+
logUnknownAttribute(AWS_REMOTE_SERVICE, span);
201+
remoteService = UNKNOWN_REMOTE_SERVICE;
202+
}
203+
builder.put(AWS_REMOTE_SERVICE, remoteService);
204+
}
205+
206+
private static void setRemoteOperation(
207+
SpanData span, AttributesBuilder builder, AttributeKey<String> remoteOperationKey) {
208+
String remoteOperation = span.getAttributes().get(remoteOperationKey);
209+
if (remoteOperation == null) {
210+
logUnknownAttribute(AWS_REMOTE_OPERATION, span);
211+
remoteOperation = UNKNOWN_REMOTE_OPERATION;
212+
}
213+
builder.put(AWS_REMOTE_OPERATION, remoteOperation);
214+
}
215+
216+
private static void logUnknownAttribute(AttributeKey<String> attributeKey, SpanData span) {
217+
String[] params = {
218+
attributeKey.getKey(), span.getKind().name(), span.getSpanContext().getSpanId()
219+
};
220+
logger.log(Level.FINEST, "No valid {0} value found for {1} span {2}", params);
221+
}
222+
}

0 commit comments

Comments
 (0)