1
1
package datadog .trace .common .metrics ;
2
2
3
3
import static datadog .communication .ddagent .DDAgentFeaturesDiscovery .V6_METRICS_ENDPOINT ;
4
+ import static datadog .trace .api .DDTags .BASE_SERVICE ;
4
5
import static datadog .trace .api .Functions .UTF8_ENCODE ;
5
6
import static datadog .trace .bootstrap .instrumentation .api .Tags .SPAN_KIND ;
7
+ import static datadog .trace .bootstrap .instrumentation .api .Tags .SPAN_KIND_CLIENT ;
8
+ import static datadog .trace .bootstrap .instrumentation .api .Tags .SPAN_KIND_CONSUMER ;
9
+ import static datadog .trace .bootstrap .instrumentation .api .Tags .SPAN_KIND_INTERNAL ;
10
+ import static datadog .trace .bootstrap .instrumentation .api .Tags .SPAN_KIND_PRODUCER ;
11
+ import static datadog .trace .bootstrap .instrumentation .api .Tags .SPAN_KIND_SERVER ;
6
12
import static datadog .trace .common .metrics .AggregateMetric .ERROR_TAG ;
7
13
import static datadog .trace .common .metrics .AggregateMetric .TOP_LEVEL_TAG ;
8
14
import static datadog .trace .common .metrics .SignalItem .ReportSignal .REPORT ;
9
15
import static datadog .trace .common .metrics .SignalItem .StopSignal .STOP ;
10
16
import static datadog .trace .util .AgentThreadFactory .AgentThread .METRICS_AGGREGATOR ;
11
17
import static datadog .trace .util .AgentThreadFactory .THREAD_JOIN_TIMOUT_MS ;
12
18
import static datadog .trace .util .AgentThreadFactory .newAgentThread ;
19
+ import static java .util .Collections .unmodifiableSet ;
13
20
import static java .util .concurrent .TimeUnit .SECONDS ;
14
21
15
22
import datadog .communication .ddagent .DDAgentFeaturesDiscovery ;
16
23
import datadog .communication .ddagent .SharedCommunicationObjects ;
17
24
import datadog .trace .api .Config ;
25
+ import datadog .trace .api .Pair ;
18
26
import datadog .trace .api .WellKnownTags ;
19
27
import datadog .trace .api .cache .DDCache ;
20
28
import datadog .trace .api .cache .DDCaches ;
25
33
import datadog .trace .core .DDTraceCoreInfo ;
26
34
import datadog .trace .core .monitor .HealthMetrics ;
27
35
import datadog .trace .util .AgentTaskScheduler ;
36
+ import java .util .ArrayList ;
37
+ import java .util .Arrays ;
28
38
import java .util .Collections ;
39
+ import java .util .HashSet ;
29
40
import java .util .List ;
30
41
import java .util .Map ;
31
42
import java .util .Queue ;
32
43
import java .util .Set ;
33
44
import java .util .concurrent .CompletableFuture ;
34
45
import java .util .concurrent .Future ;
35
46
import java .util .concurrent .TimeUnit ;
47
+ import java .util .function .Function ;
36
48
import org .jctools .maps .NonBlockingHashMap ;
37
49
import org .jctools .queues .MpscCompoundQueue ;
38
50
import org .jctools .queues .SpmcArrayQueue ;
@@ -49,8 +61,32 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve
49
61
private static final DDCache <String , UTF8BytesString > SERVICE_NAMES =
50
62
DDCaches .newFixedSizeCache (32 );
51
63
64
+ private static final DDCache <CharSequence , UTF8BytesString > SPAN_KINDS =
65
+ DDCaches .newFixedSizeCache (16 );
66
+ private static final DDCache <
67
+ String , Pair <DDCache <String , UTF8BytesString >, Function <String , UTF8BytesString >>>
68
+ PEER_TAGS_CACHE =
69
+ DDCaches .newFixedSizeCache (
70
+ 64 ); // it can be unbounded since those values are returned by the agent and should be
71
+ // under control. 64 entries is enough in this case to contain all the peer tags.
72
+ private static final Function <
73
+ String , Pair <DDCache <String , UTF8BytesString >, Function <String , UTF8BytesString >>>
74
+ PEER_TAGS_CACHE_ADDER =
75
+ key ->
76
+ Pair .of (
77
+ DDCaches .newFixedSizeCache (512 ),
78
+ value -> UTF8BytesString .create (key + ":" + value ));
52
79
private static final CharSequence SYNTHETICS_ORIGIN = "synthetics" ;
53
80
81
+ private static final Set <String > ELIGIBLE_SPAN_KINDS_FOR_METRICS =
82
+ unmodifiableSet (
83
+ new HashSet <>(
84
+ Arrays .asList (
85
+ SPAN_KIND_SERVER , SPAN_KIND_CLIENT , SPAN_KIND_CONSUMER , SPAN_KIND_PRODUCER )));
86
+
87
+ private static final Set <String > ELIGIBLE_SPAN_KINDS_FOR_PEER_AGGREGATION =
88
+ unmodifiableSet (new HashSet <>(Arrays .asList (SPAN_KIND_CLIENT , SPAN_KIND_PRODUCER )));
89
+
54
90
private final Set <String > ignoredResources ;
55
91
private final Queue <Batch > batchPool ;
56
92
private final NonBlockingHashMap <MetricKey , Batch > pending ;
@@ -262,18 +298,23 @@ private boolean shouldComputeMetric(CoreSpan<?> span) {
262
298
private boolean spanKindEligible (CoreSpan <?> span ) {
263
299
final Object spanKind = span .getTag (SPAN_KIND );
264
300
// use toString since it could be a CharSequence...
265
- return spanKind != null && features . spanKindsToComputedStats () .contains (spanKind .toString ());
301
+ return spanKind != null && ELIGIBLE_SPAN_KINDS_FOR_METRICS .contains (spanKind .toString ());
266
302
}
267
303
268
304
private boolean publish (CoreSpan <?> span , boolean isTopLevel ) {
305
+ final CharSequence spanKind = span .getTag (SPAN_KIND , "" );
269
306
MetricKey newKey =
270
307
new MetricKey (
271
308
span .getResourceName (),
272
309
SERVICE_NAMES .computeIfAbsent (span .getServiceName (), UTF8_ENCODE ),
273
310
span .getOperationName (),
274
311
span .getType (),
275
312
span .getHttpStatusCode (),
276
- isSynthetic (span ));
313
+ isSynthetic (span ),
314
+ span .getParentId () == 0 ,
315
+ SPAN_KINDS .computeIfAbsent (
316
+ spanKind , UTF8BytesString ::create ), // save repeated utf8 conversions
317
+ getPeerTags (span , spanKind .toString ()));
277
318
boolean isNewKey = false ;
278
319
MetricKey key = keys .putIfAbsent (newKey , newKey );
279
320
if (null == key ) {
@@ -288,7 +329,7 @@ private boolean publish(CoreSpan<?> span, boolean isTopLevel) {
288
329
// returning false means that either the batch can't take any
289
330
// more data, or it has already been consumed
290
331
if (batch .add (tag , durationNanos )) {
291
- // added to a pending batch prior to consumption
332
+ // added to a pending batch prior to consumption,
292
333
// so skip publishing to the queue (we also know
293
334
// the key isn't rare enough to override the sampler)
294
335
return false ;
@@ -308,6 +349,34 @@ private boolean publish(CoreSpan<?> span, boolean isTopLevel) {
308
349
return isNewKey || span .getError () > 0 ;
309
350
}
310
351
352
+ private List <UTF8BytesString > getPeerTags (CoreSpan <?> span , String spanKind ) {
353
+ if (ELIGIBLE_SPAN_KINDS_FOR_PEER_AGGREGATION .contains (spanKind )) {
354
+ List <UTF8BytesString > peerTags = new ArrayList <>();
355
+ for (String peerTag : features .peerTags ()) {
356
+ Object value = span .getTag (peerTag );
357
+ if (value != null ) {
358
+ final Pair <DDCache <String , UTF8BytesString >, Function <String , UTF8BytesString >>
359
+ cacheAndCreator = PEER_TAGS_CACHE .computeIfAbsent (peerTag , PEER_TAGS_CACHE_ADDER );
360
+ peerTags .add (
361
+ cacheAndCreator
362
+ .getLeft ()
363
+ .computeIfAbsent (value .toString (), cacheAndCreator .getRight ()));
364
+ }
365
+ }
366
+ return peerTags ;
367
+ } else if (SPAN_KIND_INTERNAL .equals (spanKind )) {
368
+ // in this case only the base service should be aggregated if present
369
+ final String baseService = span .getTag (BASE_SERVICE );
370
+ if (baseService != null ) {
371
+ final Pair <DDCache <String , UTF8BytesString >, Function <String , UTF8BytesString >>
372
+ cacheAndCreator = PEER_TAGS_CACHE .computeIfAbsent (BASE_SERVICE , PEER_TAGS_CACHE_ADDER );
373
+ return Collections .singletonList (
374
+ cacheAndCreator .getLeft ().computeIfAbsent (baseService , cacheAndCreator .getRight ()));
375
+ }
376
+ }
377
+ return Collections .emptyList ();
378
+ }
379
+
311
380
private static boolean isSynthetic (CoreSpan <?> span ) {
312
381
return span .getOrigin () != null && SYNTHETICS_ORIGIN .equals (span .getOrigin ().toString ());
313
382
}
0 commit comments