diff --git a/tracer/src/Datadog.Trace/Activity/DuckTypes/IActivity.cs b/tracer/src/Datadog.Trace/Activity/DuckTypes/IActivity.cs index 3e3852b8762d..876b6264d0d1 100644 --- a/tracer/src/Datadog.Trace/Activity/DuckTypes/IActivity.cs +++ b/tracer/src/Datadog.Trace/Activity/DuckTypes/IActivity.cs @@ -22,6 +22,11 @@ internal interface IActivity : IDuckType /// unless you know that you're not using W3C activities (because is null string Id { get; } + /// + /// Gets the ParentId, as a string calculated from or, if an , + /// from the . + /// + /// NOTE: this property allocates, and so should generally not be called unless the specific returned value is required string? ParentId { get; } // The corresponding property on the Activity object is nullable, diff --git a/tracer/src/Datadog.Trace/Activity/DuckTypes/IW3CActivity.cs b/tracer/src/Datadog.Trace/Activity/DuckTypes/IW3CActivity.cs index 2e365d488d7f..39ac263043d4 100644 --- a/tracer/src/Datadog.Trace/Activity/DuckTypes/IW3CActivity.cs +++ b/tracer/src/Datadog.Trace/Activity/DuckTypes/IW3CActivity.cs @@ -21,7 +21,7 @@ internal interface IW3CActivity : IActivity string? SpanId { get; set; } [DuckField(Name = "_parentSpanId")] - string ParentSpanId { get; set; } + string? RawParentSpanId { get; set; } [DuckField(Name = "_id")] string? RawId { get; set; } diff --git a/tracer/src/Datadog.Trace/Activity/Handlers/ActivityHandlerCommon.cs b/tracer/src/Datadog.Trace/Activity/Handlers/ActivityHandlerCommon.cs index daf20e844192..98f548dc4358 100644 --- a/tracer/src/Datadog.Trace/Activity/Handlers/ActivityHandlerCommon.cs +++ b/tracer/src/Datadog.Trace/Activity/Handlers/ActivityHandlerCommon.cs @@ -39,7 +39,6 @@ public static void ActivityStarted(string sourceName, T activity, OpenTelemet where T : IActivity { Tracer.Instance.TracerManager.Telemetry.IntegrationRunning(IntegrationId); - var activeSpan = Tracer.Instance.ActiveScope?.Span as Span; // Propagate Trace and Parent Span ids SpanContext? parent = null; @@ -48,25 +47,28 @@ public static void ActivityStarted(string sourceName, T activity, OpenTelemet string? rawTraceId = null; string? rawSpanId = null; - // for non-IW3CActivity interfaces we'll use Activity.Id and string.Empty as the key as they don't have a guaranteed TraceId+SpanId - // for IW3CActivity interfaces we'll use the Activity.TraceId + Activity.SpanId as the key - // have to also validate that the TraceId and SpanId actually exist and aren't null - as they can be in some cases + // for non-W3C activities using Hierarchical IDs (both IW3CActivity and IActivity) we use Activity.Id and string.Empty + // for W3C ID activities (always IW3CActivity) we'll use the Activity.TraceId + Activity.SpanId as the key ActivityKey? activityKey = null; - if (activity is IW3CActivity w3cActivity) + if (activity is IW3CActivity activity3) { - var activityTraceId = w3cActivity.TraceId; - var activitySpanId = w3cActivity.SpanId; + var activityTraceId = activity3.TraceId; + var activitySpanId = activity3.SpanId; - // If the user has specified a parent context, get the parent Datadog SpanContext - if (w3cActivity is { ParentSpanId: { } parentSpanId, ParentId: { } parentId }) + if (!StringUtil.IsNullOrEmpty(activityTraceId)) { - // We know that we have a parent context, but we use TraceId+ParentSpanId for the mapping. - // This is a result of an issue with OTel v1.0.1 (unsure if OTel or us tbh) where the - // ".ParentId" matched for the Trace+Span IDs but not for the flags portion. - // Doing a lookup on just the TraceId+ParentSpanId seems to be more resilient. - if (activityTraceId != null!) + // W3C ID + if (activity3 is { RawParentSpanId: { } parentSpanId }) { + // This will be true for activities using W3C IDs which have a "remote" parent span ID + // We explicitly don't check the case where we _do_ have a Parent object (i.e. in-process activity) + // as in that scenario we may need to remap the parent instead (see below). + // + // We know that we have a parent context, but we use TraceId+ParentSpanId for the mapping. + // This is a result of an issue with OTel v1.0.1 (unsure if OTel or us tbh) where the + // ".ParentId" matched for the Trace+Span IDs but not for the flags portion. + // Doing a lookup on just the TraceId+ParentSpanId seems to be more resilient. if (ActivityMappingById.TryGetValue(new ActivityKey(activityTraceId, parentSpanId), out ActivityMapping mapping)) { parent = mapping.Scope.Span.Context; @@ -85,43 +87,55 @@ public static void ActivityStarted(string sourceName, T activity, OpenTelemet rawSpanId: parentSpanId); } } + } + else + { + // No traceID, so must be Hierarchical ID + if (activity3 is { RawParentSpanId: { } parentSpanId }) + { + // This is a weird scenario - we're in a hierarchical ID, we don't have a trace ID, but we _do_ have a _parentSpanID?! + // should never hit this path unless we've gone wrong somewhere + Log.Error("Activity with ID {ActivityId} had parent span ID {ParentSpanId} but TraceID was missing", activity.Id, parentSpanId); + } else { - // we have a ParentSpanId/ParentId, but no TraceId/SpanId, so default to use the ParentId for lookup - if (ActivityMappingById.TryGetValue(new ActivityKey(parentId), out ActivityMapping mapping)) + // Since _parentSpanID is null, this either grabs _parentId, or Parent.Id, depending on what was set + var parentId = activity3.ParentId; + if (!StringUtil.IsNullOrEmpty(parentId) && ActivityMappingById.TryGetValue(new ActivityKey(parentId), out ActivityMapping mapping)) { parent = mapping.Scope.Span.Context; } } } - if (parent is null && activeSpan is not null) + // If we don't have a remote context, then we may need to remap the current activity to + // reparent it with a datadog span + if (parent is null + && activitySpanId is not null + && activityTraceId is not null + && Tracer.Instance.ActiveScope?.Span is Span activeSpan + && (activity.Parent is null || activity.Parent.StartTimeUtc <= activeSpan.StartTime.UtcDateTime)) { // We ensure the activity follows the same TraceId as the span // And marks the ParentId the current spanId - if ((activity.Parent is null || activity.Parent.StartTimeUtc <= activeSpan.StartTime.UtcDateTime) - && activitySpanId is not null - && activityTraceId is not null) - { - // TraceId (always 32 chars long even when using 64-bit ids) - w3cActivity.TraceId = activeSpan.Context.RawTraceId; - activityTraceId = w3cActivity.TraceId; + // TraceId (always 32 chars long even when using 64-bit ids) + activity3.TraceId = activeSpan.Context.RawTraceId; + activityTraceId = activity3.TraceId; - // SpanId (always 16 chars long) - w3cActivity.ParentSpanId = activeSpan.Context.RawSpanId; + // SpanId (always 16 chars long) + activity3.RawParentSpanId = activeSpan.Context.RawSpanId; - // We clear internals Id and ParentId values to force recalculation. - w3cActivity.RawId = null; - w3cActivity.RawParentId = null; + // We clear internal IDs to force recalculation. + activity3.RawId = null; + activity3.RawParentId = null; - // Avoid recalculation of the traceId. - traceId = activeSpan.TraceId128; - } + // Avoid recalculation of the traceId. + traceId = activeSpan.TraceId128; } // if there's an existing Activity we try to use its TraceId and SpanId, // but if Activity.IdFormat is not ActivityIdFormat.W3C, they may be null or unparsable - if (activityTraceId != null! && activitySpanId != null!) + if (activityTraceId != null && activitySpanId != null) { if (traceId == TraceId.Zero) { @@ -132,11 +146,16 @@ public static void ActivityStarted(string sourceName, T activity, OpenTelemet rawTraceId = activityTraceId; rawSpanId = activitySpanId; + activityKey = new(traceId: activityTraceId, spanId: activitySpanId); } - - if (activityTraceId != null! && activitySpanId != null!) + } + else + { + // non-IW3CActivity, i.e. we're in .NET Core 2.x territory. Only have hierarchical IDs to worry about here + var parentId = activity.ParentId; + if (!StringUtil.IsNullOrEmpty(parentId) && ActivityMappingById.TryGetValue(new ActivityKey(parentId), out ActivityMapping mapping)) { - activityKey = new(traceId: activityTraceId, spanId: activitySpanId); + parent = mapping.Scope.Span.Context; } } @@ -158,8 +177,9 @@ public static void ActivityStarted(string sourceName, T activity, OpenTelemet } // We check if we have to ignore the activity by the operation name value - if (IgnoreActivityHandler.IgnoreByOperationName(activity, activeSpan)) + if (IgnoreActivityHandler.ShouldIgnoreByOperationName(activity.OperationName)) { + IgnoreActivityHandler.IgnoreActivity(activity, Tracer.Instance.ActiveScope?.Span as Span); activityMapping = default; return; } @@ -216,7 +236,7 @@ public static void ActivityStopped(string sourceName, T activity) { if (activity.Instance is not null) { - if (IgnoreActivityHandler.ShouldIgnoreByOperationName(activity)) + if (IgnoreActivityHandler.ShouldIgnoreByOperationName(activity.OperationName)) { return; } @@ -255,6 +275,16 @@ public static void ActivityStopped(string sourceName, T activity) } } + StopActivitySlow(sourceName, activity); + } + catch (Exception ex) + { + Log.Error(ex, "Error processing the DefaultActivityHandler.ActivityStopped callback"); + } + + static void StopActivitySlow(string sourceName, TInner activity) + where TInner : IActivity + { // The listener didn't send us the Activity or the scope instance was not found // In this case we are going go through the dictionary to check if we have an activity that // has been closed and then close the associated scope. @@ -319,10 +349,6 @@ public static void ActivityStopped(string sourceName, T activity) } } } - catch (Exception ex) - { - Log.Error(ex, "Error processing the DefaultActivityHandler.ActivityStopped callback"); - } static void CloseActivityScope(string sourceName, TInner activity, Scope scope) where TInner : IActivity diff --git a/tracer/src/Datadog.Trace/Activity/Handlers/IgnoreActivityHandler.cs b/tracer/src/Datadog.Trace/Activity/Handlers/IgnoreActivityHandler.cs index 04055adf1ade..d055f24f6d95 100644 --- a/tracer/src/Datadog.Trace/Activity/Handlers/IgnoreActivityHandler.cs +++ b/tracer/src/Datadog.Trace/Activity/Handlers/IgnoreActivityHandler.cs @@ -5,6 +5,7 @@ #nullable enable +using System; using Datadog.Trace.Activity.DuckTypes; using Datadog.Trace.Util; @@ -30,36 +31,43 @@ internal sealed class IgnoreActivityHandler : IActivityHandler "Experimental.System.Net.Sockets", }; - private static readonly string[] IgnoreOperationNamesStartingWith = + public static bool ShouldIgnoreByOperationName(string? operationName) { - "System.Net.Http.", - "Microsoft.AspNetCore.", - }; + // We only have two ignored operation names for now, if we get more, we can be more + // generalized, but this is called twice in hot path creation + return operationName is not null + && (operationName.StartsWith("System.Net.Http.", StringComparison.Ordinal) + || operationName.StartsWith("Microsoft.AspNetCore.", StringComparison.Ordinal)); + } - public static bool ShouldIgnoreByOperationName(T activity) + public static void IgnoreActivity(T activity, Span? span) where T : IActivity { - foreach (var ignoreSourceName in IgnoreOperationNamesStartingWith) + if (span is not null && activity is IW3CActivity w3cActivity) { - if (activity.OperationName?.StartsWith(ignoreSourceName) == true) +#pragma warning disable DDDUCK001 // Checking IDuckType for null + if ((activity.Parent is null || activity.Parent.StartTimeUtc < span.StartTime.UtcDateTime) + && w3cActivity.SpanId is not null + && w3cActivity.TraceId is not null) { - return true; - } - } + // If we ignore the activity and there's an existing active span + // We modify the activity spanId with the one in the span + // The reason for that is in case this ignored activity is used + // for propagation then the current active span will appear as parentId + // in the context propagation, and we will keep the entire trace. - return false; - } + // TraceId (always 32 chars long even when using 64-bit ids) + w3cActivity.TraceId = span.Context.RawTraceId; - public static bool IgnoreByOperationName(T activity, Span? span) - where T : IActivity - { - if (ShouldIgnoreByOperationName(activity)) - { - IgnoreActivity(activity, span); - return true; - } + // SpanId (always 16 chars long) + w3cActivity.RawParentSpanId = span.Context.RawSpanId; - return false; + // We clear internal Ids to force recalculation. + w3cActivity.RawId = null; + w3cActivity.RawParentId = null; + } +#pragma warning restore DDDUCK001 // Checking IDuckType for null + } } public bool ShouldListenTo(string sourceName, string? version) @@ -87,35 +95,5 @@ public void ActivityStopped(string sourceName, T activity) { // Do nothing } - - private static void IgnoreActivity(T activity, Span? span) - where T : IActivity - { - if (span is not null && activity is IW3CActivity w3cActivity) - { -#pragma warning disable DDDUCK001 // Checking IDuckType for null - if ((activity.Parent is null || activity.Parent.StartTimeUtc < span.StartTime.UtcDateTime) - && w3cActivity.SpanId is not null - && w3cActivity.TraceId is not null) - { - // If we ignore the activity and there's an existing active span - // We modify the activity spanId with the one in the span - // The reason for that is in case this ignored activity is used - // for propagation then the current active span will appear as parentId - // in the context propagation, and we will keep the entire trace. - - // TraceId (always 32 chars long even when using 64-bit ids) - w3cActivity.TraceId = span.Context.RawTraceId; - - // SpanId (always 16 chars long) - w3cActivity.ParentSpanId = span.Context.RawSpanId; - - // We clear internals Id and ParentId values to force recalculation. - w3cActivity.RawId = null; - w3cActivity.RawParentId = null; - } -#pragma warning restore DDDUCK001 // Checking IDuckType for null - } - } } } diff --git a/tracer/test/Datadog.Trace.Tests/ActivityTests.cs b/tracer/test/Datadog.Trace.Tests/ActivityTests.cs index 63b796c6ac1e..1c3ddc485fd2 100644 --- a/tracer/test/Datadog.Trace.Tests/ActivityTests.cs +++ b/tracer/test/Datadog.Trace.Tests/ActivityTests.cs @@ -4,6 +4,7 @@ // using System; +using System.Threading.Tasks; using Datadog.Trace.Configuration; using Datadog.Trace.Propagators; using Datadog.Trace.TestHelpers; @@ -32,14 +33,14 @@ public ActivityTests(ActivityFixture fixture) } [SkippableFact] - public void SimpleActivitiesAndSpansTest() + public async Task SimpleActivitiesAndSpansTest() { // macos 12 is crazy flaky around timings // We should unskip this once we have resolved the issues around Hierarchical IDs SkipOn.Platform(SkipOn.PlatformValue.MacOs); var settings = new TracerSettings(); - var tracer = TracerHelper.CreateWithFakeAgent(settings); + await using var tracer = TracerHelper.CreateWithFakeAgent(settings); Tracer.UnsafeSetTracerInstance(tracer); Tracer.Instance.ActiveScope.Should().BeNull(); @@ -136,10 +137,10 @@ public void SimpleActivitiesAndSpansTest() } [Fact] - public void SimpleSpansAndActivitiesTest() + public async Task SimpleSpansAndActivitiesTest() { var settings = new TracerSettings(); - var tracer = TracerHelper.CreateWithFakeAgent(settings); + await using var tracer = TracerHelper.CreateWithFakeAgent(settings); Tracer.UnsafeSetTracerInstance(tracer); Tracer.Instance.ActiveScope.Should().BeNull();