Skip to content

Commit c3b0061

Browse files
authored
Various optimizations for Activity handling (#8040)
## Summary of changes Various optimizations in the `Activity` handling code to reduce allocations and execution time ## Reason for change Some properties are triksy, as they do a bunch of allocation, so we should avoid them if we can. The changes in here _look_ bigger than they are diff-wise, I've added comments to aid review. ## Implementation details - Don't call `ParentId` until we definitely need it. - This property does a bunch of allocation to generate a "valid" value, so we should avoid it if we can. This makes the conditions a bit harder to read, but delays calling `ParentId` until we're sure we don't have something better already - Avoid calling `Tracer.Instance.ActiveScope?.Span` until we know we need it (very minor optimisations but why not 🤷‍♂️) - Extract `StopActivitySlow` to a separate method, as we _shouldn't_ hit this now, so should help the JIT out with things like code size etc of the calling method (conjecture, not tested, but I think it's better from code understanding PoV too) - Simplify `ShouldIgnoreByOperationName` which also improves execution time 10x from ~30us to ~3us. ## Test coverage Functionally covered by existing tests. Benchmarks compared to #8039 show improvments. Allocations listed below, but execution time is also improved: I re-ran the numbers after making the updates, and the benchmarks are actually better: <details><summary>Benchmarks for original PR</summary> <p> | Method | Runtime | Mean | StdDev | Gen0 | Allocated | Compared to #8039 | | ------------------------------- | -------------------- | -------: | --------: | -----: | --------: | ----------------: | | StartStopWithChild_Hierarchical | .NET 6.0 | 3.410 us | 0.0658 us | 0.0153 | 3.79 KB | -0.02 KB | | StartStopWithChild_Hierarchical | .NET 8.0 | 2.582 us | 0.0443 us | 0.0114 | 3.79 KB | -0.02 KB | | StartStopWithChild_Hierarchical | .NET Core 3.1 | 4.272 us | 0.0731 us | 0.0153 | 4.02 KB | -0.02 KB | | StartStopWithChild_Hierarchical | .NET Framework 4.7.2 | 5.165 us | 0.1245 us | 0.7324 | 4.51 KB | -0.03 KB | | | | | | | | | | StartStopWithChild | .NET 6.0 | 3.312 us | 0.0266 us | 0.0153 | 3.78 KB | -0.04 KB | | StartStopWithChild | .NET 8.0 | 2.648 us | 0.0306 us | 0.0114 | 3.78 KB | -0.13 KB | | StartStopWithChild | .NET Core 3.1 | 4.344 us | 0.0555 us | 0.0076 | 3.97 KB | -0.23 KB | | StartStopWithChild | .NET Framework 4.7.2 | 5.234 us | 0.1568 us | 0.7095 | 4.39 KB | -0.30 KB | </p> </details> | Method | Runtime | Mean | StdDev | Gen0 | Allocated | Alloc Ratio | | ------------------------------- | -------------------- | -------: | --------: | -----: | --------: | ----------: | | StartStopWithChild_Hierarchical | .NET 6.0 | 3.770 us | 0.3101 us | 0.0076 | 3.58 KB | -0.23 KB | | StartStopWithChild_Hierarchical | .NET 8.0 | 3.039 us | 0.2377 us | 0.0076 | 3.58 KB | -0.23 KB | | StartStopWithChild_Hierarchical | .NET Core 3.1 | 4.490 us | 0.1135 us | 0.0076 | 3.8 KB | -0.22 KB | | StartStopWithChild_Hierarchical | .NET Framework 4.7.2 | 5.303 us | 0.2469 us | 0.6943 | 4.3 KB | -0.24 KB | | | | | | | | | | StartStopWithChild | .NET 6.0 | 3.386 us | 0.0971 us | 0.0114 | 3.59 KB | -0.43 KB | | StartStopWithChild | .NET 8.0 | 2.661 us | 0.0218 us | 0.0114 | 3.59 KB | -0.32 KB | | StartStopWithChild | .NET Core 3.1 | 4.540 us | 0.1625 us | 0.0076 | 3.78 KB | -0.42 KB | | StartStopWithChild | .NET Framework 4.7.2 | 5.563 us | 0.1946 us | 0.6790 | 4.2 KB | -0.49 KB | ## Other details https://datadoghq.atlassian.net/browse/LANGPLAT-915 Part of a stack working to improve OTel performance - #8036 - #8037 - #8038 - #8039 - #8040 👈 - #8041 - #8042
1 parent 370c5a6 commit c3b0061

File tree

5 files changed

+108
-98
lines changed

5 files changed

+108
-98
lines changed

tracer/src/Datadog.Trace/Activity/DuckTypes/IActivity.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ internal interface IActivity : IDuckType
2222
/// unless you know that you're not using W3C activities (because <see cref="IW3CActivity.SpanId"/> is null</remarks>
2323
string Id { get; }
2424

25+
/// <summary>
26+
/// Gets the ParentId, as a string calculated from <see cref="Parent"/> or, if an <see cref="IW3CActivity"/>,
27+
/// from the <see cref="IW3CActivity.RawParentSpanId"/>.
28+
/// </summary>
29+
/// <remarks>NOTE: this property allocates, and so should generally not be called unless the specific returned value is required</remarks>
2530
string? ParentId { get; }
2631

2732
// The corresponding property on the Activity object is nullable,

tracer/src/Datadog.Trace/Activity/DuckTypes/IW3CActivity.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ internal interface IW3CActivity : IActivity
2121
string? SpanId { get; set; }
2222

2323
[DuckField(Name = "_parentSpanId")]
24-
string ParentSpanId { get; set; }
24+
string? RawParentSpanId { get; set; }
2525

2626
[DuckField(Name = "_id")]
2727
string? RawId { get; set; }

tracer/src/Datadog.Trace/Activity/Handlers/ActivityHandlerCommon.cs

Lines changed: 68 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ public static void ActivityStarted<T>(string sourceName, T activity, OpenTelemet
3939
where T : IActivity
4040
{
4141
Tracer.Instance.TracerManager.Telemetry.IntegrationRunning(IntegrationId);
42-
var activeSpan = Tracer.Instance.ActiveScope?.Span as Span;
4342

4443
// Propagate Trace and Parent Span ids
4544
SpanContext? parent = null;
@@ -48,25 +47,28 @@ public static void ActivityStarted<T>(string sourceName, T activity, OpenTelemet
4847
string? rawTraceId = null;
4948
string? rawSpanId = null;
5049

51-
// for non-IW3CActivity interfaces we'll use Activity.Id and string.Empty as the key as they don't have a guaranteed TraceId+SpanId
52-
// for IW3CActivity interfaces we'll use the Activity.TraceId + Activity.SpanId as the key
53-
// have to also validate that the TraceId and SpanId actually exist and aren't null - as they can be in some cases
50+
// for non-W3C activities using Hierarchical IDs (both IW3CActivity and IActivity) we use Activity.Id and string.Empty
51+
// for W3C ID activities (always IW3CActivity) we'll use the Activity.TraceId + Activity.SpanId as the key
5452
ActivityKey? activityKey = null;
5553

56-
if (activity is IW3CActivity w3cActivity)
54+
if (activity is IW3CActivity activity3)
5755
{
58-
var activityTraceId = w3cActivity.TraceId;
59-
var activitySpanId = w3cActivity.SpanId;
56+
var activityTraceId = activity3.TraceId;
57+
var activitySpanId = activity3.SpanId;
6058

61-
// If the user has specified a parent context, get the parent Datadog SpanContext
62-
if (w3cActivity is { ParentSpanId: { } parentSpanId, ParentId: { } parentId })
59+
if (!StringUtil.IsNullOrEmpty(activityTraceId))
6360
{
64-
// We know that we have a parent context, but we use TraceId+ParentSpanId for the mapping.
65-
// This is a result of an issue with OTel v1.0.1 (unsure if OTel or us tbh) where the
66-
// ".ParentId" matched for the Trace+Span IDs but not for the flags portion.
67-
// Doing a lookup on just the TraceId+ParentSpanId seems to be more resilient.
68-
if (activityTraceId != null!)
61+
// W3C ID
62+
if (activity3 is { RawParentSpanId: { } parentSpanId })
6963
{
64+
// This will be true for activities using W3C IDs which have a "remote" parent span ID
65+
// We explicitly don't check the case where we _do_ have a Parent object (i.e. in-process activity)
66+
// as in that scenario we may need to remap the parent instead (see below).
67+
//
68+
// We know that we have a parent context, but we use TraceId+ParentSpanId for the mapping.
69+
// This is a result of an issue with OTel v1.0.1 (unsure if OTel or us tbh) where the
70+
// ".ParentId" matched for the Trace+Span IDs but not for the flags portion.
71+
// Doing a lookup on just the TraceId+ParentSpanId seems to be more resilient.
7072
if (ActivityMappingById.TryGetValue(new ActivityKey(activityTraceId, parentSpanId), out ActivityMapping mapping))
7173
{
7274
parent = mapping.Scope.Span.Context;
@@ -85,43 +87,55 @@ public static void ActivityStarted<T>(string sourceName, T activity, OpenTelemet
8587
rawSpanId: parentSpanId);
8688
}
8789
}
90+
}
91+
else
92+
{
93+
// No traceID, so must be Hierarchical ID
94+
if (activity3 is { RawParentSpanId: { } parentSpanId })
95+
{
96+
// This is a weird scenario - we're in a hierarchical ID, we don't have a trace ID, but we _do_ have a _parentSpanID?!
97+
// should never hit this path unless we've gone wrong somewhere
98+
Log.Error("Activity with ID {ActivityId} had parent span ID {ParentSpanId} but TraceID was missing", activity.Id, parentSpanId);
99+
}
88100
else
89101
{
90-
// we have a ParentSpanId/ParentId, but no TraceId/SpanId, so default to use the ParentId for lookup
91-
if (ActivityMappingById.TryGetValue(new ActivityKey(parentId), out ActivityMapping mapping))
102+
// Since _parentSpanID is null, this either grabs _parentId, or Parent.Id, depending on what was set
103+
var parentId = activity3.ParentId;
104+
if (!StringUtil.IsNullOrEmpty(parentId) && ActivityMappingById.TryGetValue(new ActivityKey(parentId), out ActivityMapping mapping))
92105
{
93106
parent = mapping.Scope.Span.Context;
94107
}
95108
}
96109
}
97110

98-
if (parent is null && activeSpan is not null)
111+
// If we don't have a remote context, then we may need to remap the current activity to
112+
// reparent it with a datadog span
113+
if (parent is null
114+
&& activitySpanId is not null
115+
&& activityTraceId is not null
116+
&& Tracer.Instance.ActiveScope?.Span is Span activeSpan
117+
&& (activity.Parent is null || activity.Parent.StartTimeUtc <= activeSpan.StartTime.UtcDateTime))
99118
{
100119
// We ensure the activity follows the same TraceId as the span
101120
// And marks the ParentId the current spanId
102-
if ((activity.Parent is null || activity.Parent.StartTimeUtc <= activeSpan.StartTime.UtcDateTime)
103-
&& activitySpanId is not null
104-
&& activityTraceId is not null)
105-
{
106-
// TraceId (always 32 chars long even when using 64-bit ids)
107-
w3cActivity.TraceId = activeSpan.Context.RawTraceId;
108-
activityTraceId = w3cActivity.TraceId;
121+
// TraceId (always 32 chars long even when using 64-bit ids)
122+
activity3.TraceId = activeSpan.Context.RawTraceId;
123+
activityTraceId = activity3.TraceId;
109124

110-
// SpanId (always 16 chars long)
111-
w3cActivity.ParentSpanId = activeSpan.Context.RawSpanId;
125+
// SpanId (always 16 chars long)
126+
activity3.RawParentSpanId = activeSpan.Context.RawSpanId;
112127

113-
// We clear internals Id and ParentId values to force recalculation.
114-
w3cActivity.RawId = null;
115-
w3cActivity.RawParentId = null;
128+
// We clear internal IDs to force recalculation.
129+
activity3.RawId = null;
130+
activity3.RawParentId = null;
116131

117-
// Avoid recalculation of the traceId.
118-
traceId = activeSpan.TraceId128;
119-
}
132+
// Avoid recalculation of the traceId.
133+
traceId = activeSpan.TraceId128;
120134
}
121135

122136
// if there's an existing Activity we try to use its TraceId and SpanId,
123137
// but if Activity.IdFormat is not ActivityIdFormat.W3C, they may be null or unparsable
124-
if (activityTraceId != null! && activitySpanId != null!)
138+
if (activityTraceId != null && activitySpanId != null)
125139
{
126140
if (traceId == TraceId.Zero)
127141
{
@@ -132,11 +146,16 @@ public static void ActivityStarted<T>(string sourceName, T activity, OpenTelemet
132146

133147
rawTraceId = activityTraceId;
134148
rawSpanId = activitySpanId;
149+
activityKey = new(traceId: activityTraceId, spanId: activitySpanId);
135150
}
136-
137-
if (activityTraceId != null! && activitySpanId != null!)
151+
}
152+
else
153+
{
154+
// non-IW3CActivity, i.e. we're in .NET Core 2.x territory. Only have hierarchical IDs to worry about here
155+
var parentId = activity.ParentId;
156+
if (!StringUtil.IsNullOrEmpty(parentId) && ActivityMappingById.TryGetValue(new ActivityKey(parentId), out ActivityMapping mapping))
138157
{
139-
activityKey = new(traceId: activityTraceId, spanId: activitySpanId);
158+
parent = mapping.Scope.Span.Context;
140159
}
141160
}
142161

@@ -158,8 +177,9 @@ public static void ActivityStarted<T>(string sourceName, T activity, OpenTelemet
158177
}
159178

160179
// We check if we have to ignore the activity by the operation name value
161-
if (IgnoreActivityHandler.IgnoreByOperationName(activity, activeSpan))
180+
if (IgnoreActivityHandler.ShouldIgnoreByOperationName(activity.OperationName))
162181
{
182+
IgnoreActivityHandler.IgnoreActivity(activity, Tracer.Instance.ActiveScope?.Span as Span);
163183
activityMapping = default;
164184
return;
165185
}
@@ -216,7 +236,7 @@ public static void ActivityStopped<T>(string sourceName, T activity)
216236
{
217237
if (activity.Instance is not null)
218238
{
219-
if (IgnoreActivityHandler.ShouldIgnoreByOperationName(activity))
239+
if (IgnoreActivityHandler.ShouldIgnoreByOperationName(activity.OperationName))
220240
{
221241
return;
222242
}
@@ -255,6 +275,16 @@ public static void ActivityStopped<T>(string sourceName, T activity)
255275
}
256276
}
257277

278+
StopActivitySlow(sourceName, activity);
279+
}
280+
catch (Exception ex)
281+
{
282+
Log.Error(ex, "Error processing the DefaultActivityHandler.ActivityStopped callback");
283+
}
284+
285+
static void StopActivitySlow<TInner>(string sourceName, TInner activity)
286+
where TInner : IActivity
287+
{
258288
// The listener didn't send us the Activity or the scope instance was not found
259289
// In this case we are going go through the dictionary to check if we have an activity that
260290
// has been closed and then close the associated scope.
@@ -319,10 +349,6 @@ public static void ActivityStopped<T>(string sourceName, T activity)
319349
}
320350
}
321351
}
322-
catch (Exception ex)
323-
{
324-
Log.Error(ex, "Error processing the DefaultActivityHandler.ActivityStopped callback");
325-
}
326352

327353
static void CloseActivityScope<TInner>(string sourceName, TInner activity, Scope scope)
328354
where TInner : IActivity

tracer/src/Datadog.Trace/Activity/Handlers/IgnoreActivityHandler.cs

Lines changed: 29 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#nullable enable
77

8+
using System;
89
using Datadog.Trace.Activity.DuckTypes;
910
using Datadog.Trace.Util;
1011

@@ -30,36 +31,43 @@ internal sealed class IgnoreActivityHandler : IActivityHandler
3031
"Experimental.System.Net.Sockets",
3132
};
3233

33-
private static readonly string[] IgnoreOperationNamesStartingWith =
34+
public static bool ShouldIgnoreByOperationName(string? operationName)
3435
{
35-
"System.Net.Http.",
36-
"Microsoft.AspNetCore.",
37-
};
36+
// We only have two ignored operation names for now, if we get more, we can be more
37+
// generalized, but this is called twice in hot path creation
38+
return operationName is not null
39+
&& (operationName.StartsWith("System.Net.Http.", StringComparison.Ordinal)
40+
|| operationName.StartsWith("Microsoft.AspNetCore.", StringComparison.Ordinal));
41+
}
3842

39-
public static bool ShouldIgnoreByOperationName<T>(T activity)
43+
public static void IgnoreActivity<T>(T activity, Span? span)
4044
where T : IActivity
4145
{
42-
foreach (var ignoreSourceName in IgnoreOperationNamesStartingWith)
46+
if (span is not null && activity is IW3CActivity w3cActivity)
4347
{
44-
if (activity.OperationName?.StartsWith(ignoreSourceName) == true)
48+
#pragma warning disable DDDUCK001 // Checking IDuckType for null
49+
if ((activity.Parent is null || activity.Parent.StartTimeUtc < span.StartTime.UtcDateTime)
50+
&& w3cActivity.SpanId is not null
51+
&& w3cActivity.TraceId is not null)
4552
{
46-
return true;
47-
}
48-
}
53+
// If we ignore the activity and there's an existing active span
54+
// We modify the activity spanId with the one in the span
55+
// The reason for that is in case this ignored activity is used
56+
// for propagation then the current active span will appear as parentId
57+
// in the context propagation, and we will keep the entire trace.
4958

50-
return false;
51-
}
59+
// TraceId (always 32 chars long even when using 64-bit ids)
60+
w3cActivity.TraceId = span.Context.RawTraceId;
5261

53-
public static bool IgnoreByOperationName<T>(T activity, Span? span)
54-
where T : IActivity
55-
{
56-
if (ShouldIgnoreByOperationName(activity))
57-
{
58-
IgnoreActivity(activity, span);
59-
return true;
60-
}
62+
// SpanId (always 16 chars long)
63+
w3cActivity.RawParentSpanId = span.Context.RawSpanId;
6164

62-
return false;
65+
// We clear internal Ids to force recalculation.
66+
w3cActivity.RawId = null;
67+
w3cActivity.RawParentId = null;
68+
}
69+
#pragma warning restore DDDUCK001 // Checking IDuckType for null
70+
}
6371
}
6472

6573
public bool ShouldListenTo(string sourceName, string? version)
@@ -87,35 +95,5 @@ public void ActivityStopped<T>(string sourceName, T activity)
8795
{
8896
// Do nothing
8997
}
90-
91-
private static void IgnoreActivity<T>(T activity, Span? span)
92-
where T : IActivity
93-
{
94-
if (span is not null && activity is IW3CActivity w3cActivity)
95-
{
96-
#pragma warning disable DDDUCK001 // Checking IDuckType for null
97-
if ((activity.Parent is null || activity.Parent.StartTimeUtc < span.StartTime.UtcDateTime)
98-
&& w3cActivity.SpanId is not null
99-
&& w3cActivity.TraceId is not null)
100-
{
101-
// If we ignore the activity and there's an existing active span
102-
// We modify the activity spanId with the one in the span
103-
// The reason for that is in case this ignored activity is used
104-
// for propagation then the current active span will appear as parentId
105-
// in the context propagation, and we will keep the entire trace.
106-
107-
// TraceId (always 32 chars long even when using 64-bit ids)
108-
w3cActivity.TraceId = span.Context.RawTraceId;
109-
110-
// SpanId (always 16 chars long)
111-
w3cActivity.ParentSpanId = span.Context.RawSpanId;
112-
113-
// We clear internals Id and ParentId values to force recalculation.
114-
w3cActivity.RawId = null;
115-
w3cActivity.RawParentId = null;
116-
}
117-
#pragma warning restore DDDUCK001 // Checking IDuckType for null
118-
}
119-
}
12098
}
12199
}

tracer/test/Datadog.Trace.Tests/ActivityTests.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// </copyright>
55

66
using System;
7+
using System.Threading.Tasks;
78
using Datadog.Trace.Configuration;
89
using Datadog.Trace.Propagators;
910
using Datadog.Trace.TestHelpers;
@@ -32,14 +33,14 @@ public ActivityTests(ActivityFixture fixture)
3233
}
3334

3435
[SkippableFact]
35-
public void SimpleActivitiesAndSpansTest()
36+
public async Task SimpleActivitiesAndSpansTest()
3637
{
3738
// macos 12 is crazy flaky around timings
3839
// We should unskip this once we have resolved the issues around Hierarchical IDs
3940
SkipOn.Platform(SkipOn.PlatformValue.MacOs);
4041

4142
var settings = new TracerSettings();
42-
var tracer = TracerHelper.CreateWithFakeAgent(settings);
43+
await using var tracer = TracerHelper.CreateWithFakeAgent(settings);
4344
Tracer.UnsafeSetTracerInstance(tracer);
4445

4546
Tracer.Instance.ActiveScope.Should().BeNull();
@@ -136,10 +137,10 @@ public void SimpleActivitiesAndSpansTest()
136137
}
137138

138139
[Fact]
139-
public void SimpleSpansAndActivitiesTest()
140+
public async Task SimpleSpansAndActivitiesTest()
140141
{
141142
var settings = new TracerSettings();
142-
var tracer = TracerHelper.CreateWithFakeAgent(settings);
143+
await using var tracer = TracerHelper.CreateWithFakeAgent(settings);
143144
Tracer.UnsafeSetTracerInstance(tracer);
144145

145146
Tracer.Instance.ActiveScope.Should().BeNull();

0 commit comments

Comments
 (0)