diff --git a/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml b/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml index c3f7aa3b23de..e729b88100d6 100644 --- a/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml +++ b/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml @@ -614,6 +614,7 @@ + diff --git a/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs b/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs index 1c378c6c69a2..9a1d42c2ddec 100644 --- a/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs +++ b/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs @@ -179,6 +179,16 @@ not null when string.Equals(value, "otlp", StringComparison.OrdinalIgnoreCase) = RuntimeMetricsEnabled = runtimeMetricsEnabledResult.WithDefault(false); + RuntimeMetricsDiagnosticsMetricsApiEnabled = config.WithKeys(ConfigurationKeys.RuntimeMetricsDiagnosticsMetricsApiEnabled).AsBool(false); + +#if !NET6_0_OR_GREATER + if (RuntimeMetricsEnabled && RuntimeMetricsDiagnosticsMetricsApiEnabled) + { + Log.Warning( + $"{ConfigurationKeys.RuntimeMetricsDiagnosticsMetricsApiEnabled} was enabled, but System.Diagnostics.Metrics is only available on .NET 6+. Using standard runtime metrics collector."); + telemetry.Record(ConfigurationKeys.RuntimeMetricsDiagnosticsMetricsApiEnabled, false, ConfigurationOrigins.Calculated); + } +#endif OtelMetricExportIntervalMs = config .WithKeys(ConfigurationKeys.OpenTelemetry.MetricExportIntervalMs) .AsInt32(defaultValue: 10_000); @@ -1053,6 +1063,15 @@ not null when string.Equals(value, "otlp", StringComparison.OrdinalIgnoreCase) = /// internal bool RuntimeMetricsEnabled { get; } + /// + /// Gets a value indicating whether the experimental runtime metrics collector which uses the + /// System.Diagnostics.Metrics API. + /// This collector can only be enabled when using .NET 6+, and will only include ASP.NET Core metrics + /// when using .NET 8+. + /// + /// + internal bool RuntimeMetricsDiagnosticsMetricsApiEnabled { get; } + /// /// Gets a value indicating whether libdatadog data pipeline /// is enabled. diff --git a/tracer/src/Datadog.Trace/Configuration/supported-configurations-docs.yaml b/tracer/src/Datadog.Trace/Configuration/supported-configurations-docs.yaml index dd981888f8d0..fed47608e486 100644 --- a/tracer/src/Datadog.Trace/Configuration/supported-configurations-docs.yaml +++ b/tracer/src/Datadog.Trace/Configuration/supported-configurations-docs.yaml @@ -555,6 +555,13 @@ DD_RUNTIME_METRICS_ENABLED: | Configuration key for enabling or disabling runtime metrics sent to DogStatsD. Default value is false (disabled). +DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED: | + Enables an experimental runtime metrics collector which uses the + System.Diagnostics.Metrics API. + This collector can only be enabled when using .NET 6+, and will only include ASP.NET Core metrics + when using .NET 8+. + Default value is false (disabled). + DD_SERVICE: | Configuration key for the application's default service name. Used as the service name for top-level spans, diff --git a/tracer/src/Datadog.Trace/Configuration/supported-configurations.json b/tracer/src/Datadog.Trace/Configuration/supported-configurations.json index 514b15af1b95..bb98a7992ce4 100644 --- a/tracer/src/Datadog.Trace/Configuration/supported-configurations.json +++ b/tracer/src/Datadog.Trace/Configuration/supported-configurations.json @@ -868,6 +868,11 @@ "A" ] }, + "DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED": { + "version": [ + "A" + ] + }, "DD_SERVICE": { "version": [ "A" diff --git a/tracer/src/Datadog.Trace/Generated/net461/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs b/tracer/src/Datadog.Trace/Generated/net461/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs index 6d551b30f309..76802c7cf644 100644 --- a/tracer/src/Datadog.Trace/Generated/net461/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs +++ b/tracer/src/Datadog.Trace/Generated/net461/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs @@ -215,6 +215,15 @@ internal static partial class ConfigurationKeys [System.Obsolete("This parameter is obsolete and should be replaced by `DD_TRACE_RATE_LIMIT`")] public const string MaxTracesSubmittedPerSecond = "DD_MAX_TRACES_PER_SECOND"; + /// + /// Enables an experimental runtime metrics collector which uses the + /// System.Diagnostics.Metrics API. + /// This collector can only be enabled when using .NET 6+, and will only include ASP.NET Core metrics + /// when using .NET 8+. + /// Default value is false (disabled). + /// + public const string RuntimeMetricsDiagnosticsMetricsApiEnabled = "DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED"; + /// /// Configuration key for enabling or disabling runtime metrics sent to DogStatsD. /// Default value is false (disabled). diff --git a/tracer/src/Datadog.Trace/Generated/net6.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs b/tracer/src/Datadog.Trace/Generated/net6.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs index 6d551b30f309..76802c7cf644 100644 --- a/tracer/src/Datadog.Trace/Generated/net6.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs +++ b/tracer/src/Datadog.Trace/Generated/net6.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs @@ -215,6 +215,15 @@ internal static partial class ConfigurationKeys [System.Obsolete("This parameter is obsolete and should be replaced by `DD_TRACE_RATE_LIMIT`")] public const string MaxTracesSubmittedPerSecond = "DD_MAX_TRACES_PER_SECOND"; + /// + /// Enables an experimental runtime metrics collector which uses the + /// System.Diagnostics.Metrics API. + /// This collector can only be enabled when using .NET 6+, and will only include ASP.NET Core metrics + /// when using .NET 8+. + /// Default value is false (disabled). + /// + public const string RuntimeMetricsDiagnosticsMetricsApiEnabled = "DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED"; + /// /// Configuration key for enabling or disabling runtime metrics sent to DogStatsD. /// Default value is false (disabled). diff --git a/tracer/src/Datadog.Trace/Generated/netcoreapp3.1/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs b/tracer/src/Datadog.Trace/Generated/netcoreapp3.1/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs index 6d551b30f309..76802c7cf644 100644 --- a/tracer/src/Datadog.Trace/Generated/netcoreapp3.1/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs +++ b/tracer/src/Datadog.Trace/Generated/netcoreapp3.1/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs @@ -215,6 +215,15 @@ internal static partial class ConfigurationKeys [System.Obsolete("This parameter is obsolete and should be replaced by `DD_TRACE_RATE_LIMIT`")] public const string MaxTracesSubmittedPerSecond = "DD_MAX_TRACES_PER_SECOND"; + /// + /// Enables an experimental runtime metrics collector which uses the + /// System.Diagnostics.Metrics API. + /// This collector can only be enabled when using .NET 6+, and will only include ASP.NET Core metrics + /// when using .NET 8+. + /// Default value is false (disabled). + /// + public const string RuntimeMetricsDiagnosticsMetricsApiEnabled = "DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED"; + /// /// Configuration key for enabling or disabling runtime metrics sent to DogStatsD. /// Default value is false (disabled). diff --git a/tracer/src/Datadog.Trace/Generated/netstandard2.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs b/tracer/src/Datadog.Trace/Generated/netstandard2.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs index 6d551b30f309..76802c7cf644 100644 --- a/tracer/src/Datadog.Trace/Generated/netstandard2.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs +++ b/tracer/src/Datadog.Trace/Generated/netstandard2.0/Datadog.Trace.SourceGenerators/ConfigurationKeysGenerator/ConfigurationKeys.g.cs @@ -215,6 +215,15 @@ internal static partial class ConfigurationKeys [System.Obsolete("This parameter is obsolete and should be replaced by `DD_TRACE_RATE_LIMIT`")] public const string MaxTracesSubmittedPerSecond = "DD_MAX_TRACES_PER_SECOND"; + /// + /// Enables an experimental runtime metrics collector which uses the + /// System.Diagnostics.Metrics API. + /// This collector can only be enabled when using .NET 6+, and will only include ASP.NET Core metrics + /// when using .NET 8+. + /// Default value is false (disabled). + /// + public const string RuntimeMetricsDiagnosticsMetricsApiEnabled = "DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED"; + /// /// Configuration key for enabling or disabling runtime metrics sent to DogStatsD. /// Default value is false (disabled). diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/DiagnosticsMetricsRuntimeMetricsListener.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/DiagnosticsMetricsRuntimeMetricsListener.cs new file mode 100644 index 000000000000..c83ec4852b24 --- /dev/null +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/DiagnosticsMetricsRuntimeMetricsListener.cs @@ -0,0 +1,340 @@ +// +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. +// +#if NET6_0_OR_GREATER + +#nullable enable + +using System; +using System.Collections.Generic; +using System.Diagnostics.Metrics; +using System.Reflection; +using System.Threading; +using Datadog.Trace.DogStatsd; +using Datadog.Trace.Logging; +using Datadog.Trace.Vendors.Serilog.Events; + +namespace Datadog.Trace.RuntimeMetrics; + +internal sealed class DiagnosticsMetricsRuntimeMetricsListener : IRuntimeMetricsListener +{ + private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(); + + // Note that we don't currently record Pinned Object Heap sizes + private static readonly string[] GcGenSizeMetricNames = [MetricsNames.Gen0HeapSize, MetricsNames.Gen1HeapSize, MetricsNames.Gen2HeapSize, MetricsNames.LohSize]; + private static readonly string[] GcGenCountMetricNames = [MetricsNames.Gen0CollectionsCount, MetricsNames.Gen1CollectionsCount, MetricsNames.Gen2CollectionsCount]; + + private static readonly int MaxGcSizeGenerations = Math.Min(GC.GetGCMemoryInfo().GenerationInfo.Length, GcGenSizeMetricNames.Length); + private static readonly int MaxGcCountGeneration = Math.Min(GC.MaxGeneration, GcGenCountMetricNames.Length - 1); + + private readonly IStatsdManager _statsd; + private readonly MeterListener _listener; + private readonly bool _aspnetcoreMetricsAvailable; + private readonly long?[] _previousGenCounts = [null, null, null]; + private readonly Func _getGcPauseTimeFunc; + + private double _gcPauseTimeSeconds; + + private long _activeRequests; + private long _failedRequests; + private long _successRequests; + private long _queuedRequests; + private long _activeConnections; + private long _queuedConnections; + private long _totalClosedConnections; + + private double? _previousGcPauseTime; + + private long? _previousContentionCount; + + public DiagnosticsMetricsRuntimeMetricsListener(IStatsdManager statsd) + { + _statsd = statsd; + _listener = new() + { + InstrumentPublished = OnInstrumentPublished, + }; + + // ASP.NET Core metrics are only available on .NET 8+ + _aspnetcoreMetricsAvailable = Environment.Version.Major >= 8; + + if (Environment.Version.Major >= 9) + { + // System.Runtime metrics are only available on .NET 9+, but the only one we need it for is GC pause time + _getGcPauseTimeFunc = GetGcPauseTime_RuntimeMetrics; + } + else if (Environment.Version.Major > 6 + || Environment.Version is { Major: 6, Build: >= 21 }) + { + // .NET 6.0.21 introduced the GC.GetTotalPauseDuration() method https://github.com/dotnet/runtime/pull/87143 + // Which is what OTel uses where required: https://github.com/open-telemetry/opentelemetry-dotnet-contrib/blob/5aa6d868/src/OpenTelemetry.Instrumentation.Runtime/RuntimeMetrics.cs#L105C40-L107 + // We could use ducktyping instead of reflection, but this is such a simple case that it's kind of easier + // to just go with the delegate approach + var methodInfo = typeof(GC).GetMethod("GetTotalPauseDuration", BindingFlags.Public | BindingFlags.Static); + if (methodInfo is null) + { + // strange, but we failed to get the delegate + _getGcPauseTimeFunc = GetGcPauseTime_Noop; + } + else + { + var getTotalPauseDuration = methodInfo.CreateDelegate>(); + _getGcPauseTimeFunc = _ => getTotalPauseDuration().TotalMilliseconds; + } + } + else + { + // can't get pause time + _getGcPauseTimeFunc = GetGcPauseTime_Noop; + } + + // The .NET runtime instruments we listen to only produce long or double values + // so that's all we listen for here + _listener.SetMeasurementEventCallback(OnMeasurementRecordedLong); + _listener.SetMeasurementEventCallback(OnMeasurementRecordedDouble); + + _listener.Start(); + } + + public void Dispose() + { + _listener.Dispose(); + } + + public void Refresh() + { + // This triggers the observable metrics to go and read the values, then calls the OnMeasurement values to send them to us + _listener.RecordObservableInstruments(); + + // Now we calculate and send the values to statsd. + using var lease = _statsd.TryGetClientLease(); + var statsd = lease.Client ?? NoOpStatsd.Instance; + + // There are many stats that we can grab directly, without needing to use the metrics APIs (which just wrap these calls anyway) + statsd.Gauge(MetricsNames.ThreadPoolWorkersCount, ThreadPool.ThreadCount); + Log.Debug($"Sent the following metrics to the DD agent: {MetricsNames.ThreadPoolWorkersCount}"); + + var contentionCount = Monitor.LockContentionCount; + if (_previousContentionCount.HasValue) + { + statsd.Counter(MetricsNames.ContentionCount, contentionCount - _previousContentionCount.Value); + Log.Debug($"Sent the following metrics to the DD agent: {MetricsNames.ContentionCount}"); + } + + _previousContentionCount = contentionCount; + + // GC Heap Size based on "dotnet.gc.last_collection.heap.size" metric + // from https://github.com/dotnet/runtime/blob/v10.0.1/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/RuntimeMetrics.cs#L185 + // If we call this API before _any_ GCs happen, we'll get 0s for the heap size, so check for that and bail skip emitting if so + var gcInfo = GC.GetGCMemoryInfo(); + if (gcInfo.Index != 0) + { + // Heap sizes + for (var i = 0; i < MaxGcSizeGenerations; ++i) + { + statsd.Gauge(GcGenSizeMetricNames[i], gcInfo.GenerationInfo[i].SizeAfterBytes); + } + + // memory load + // This is attempting to emulate the GcGlobalHeapHistory.MemoryLoad event details + // That value is calculated using + // - `current_gc_data_global->mem_pressure` (src/coreclr/gc/gc.cpp#L3288) + // - which fetches the value set via `history->mem_pressure = entry_memory_load` (src/coreclr/gc/gc.cpp#L7912) + // - which is set by calling `gc_heap::get_memory_info()` (src/coreclr/gc/gc.cpp#L29438) + // - which then calls GCToOSInterface::GetMemoryStatus(...) which has platform-specific implementations + // - On linux, memory_load is calculated differently depending if there's a restriction (src/coreclr/gc/unix/gcenv.unix.cpp#L1191) + // - Physical Memory Used / Limit + // - (g_totalPhysicalMemSize - GetAvailablePhysicalMemory()) / total + // - On Windows, memory_load is calculated differently depending if there's a restriction (src/coreclr/gc/unix/gcenv.windows.cpp#L1000) + // - Working Set Size / Limit + // - GlobalMemoryStatusEx -> (ullTotalVirtual - ullAvailVirtual) * 100.0 / (float)ms.ullTotalVirtual + // + // We try to roughly emulate that using the info in gcInfo: + var availableBytes = gcInfo.TotalAvailableMemoryBytes; + + if (availableBytes > 0) + { + statsd.Gauge(MetricsNames.GcMemoryLoad, (double)gcInfo.MemoryLoadBytes * 100.0 / availableBytes); + } + } + else + { + Log.Debug("No GC collections yet, skipping heap size metrics"); + } + + var gcPauseTimeMilliSeconds = _getGcPauseTimeFunc(this); + // We don't record 0-length pauses, so that we match RuntimeEventListener behaviour + // We don't worry about the floating point comparison, as reporting close to zero is fine + if (gcPauseTimeMilliSeconds.HasValue && _previousGcPauseTime.HasValue + && gcPauseTimeMilliSeconds.Value != _previousGcPauseTime.Value) + { + statsd.Timer(MetricsNames.GcPauseTime, gcPauseTimeMilliSeconds.Value - _previousGcPauseTime.Value); + } + + _previousGcPauseTime = gcPauseTimeMilliSeconds; + + // GC Collection counts based on "dotnet.gc.collections" metric + // from https://github.com/dotnet/runtime/blob/v10.0.1/src/libraries/System.Diagnostics.DiagnosticSource/src/System/Diagnostics/Metrics/RuntimeMetrics.cs#L159 + long collectionsFromHigherGeneration = 0; + + for (var gen = MaxGcCountGeneration; gen >= 0; --gen) + { + long collectionsFromThisGeneration = GC.CollectionCount(gen); + var thisCount = collectionsFromThisGeneration - collectionsFromHigherGeneration; + collectionsFromHigherGeneration = collectionsFromThisGeneration; + + var previous = _previousGenCounts[gen]; + _previousGenCounts[gen] = thisCount; + + if (previous.HasValue) + { + var increment = (int)Math.Min(thisCount - previous.Value, int.MaxValue); + // don't need to report zero increments + if (increment != 0) + { + statsd.Increment(GcGenCountMetricNames[gen], increment); + } + } + } + + // This isn't strictly true, due to the "previous counts" behavior, but it's good enough, and what we in other listeners + Log.Debug($"Sent the following metrics to the DD agent: {MetricsNames.Gen0HeapSize}, {MetricsNames.Gen1HeapSize}, {MetricsNames.Gen2HeapSize}, {MetricsNames.LohSize}, {MetricsNames.ContentionCount}, {MetricsNames.Gen0CollectionsCount}, {MetricsNames.Gen1CollectionsCount}, {MetricsNames.Gen2CollectionsCount}, {MetricsNames.GcPauseTime}, {MetricsNames.GcMemoryLoad}"); + + // aspnetcore metrics + if (_aspnetcoreMetricsAvailable) + { + var activeRequests = Interlocked.Read(ref _activeRequests); + var failedRequests = Interlocked.Read(ref _failedRequests); + var successRequests = Interlocked.Read(ref _successRequests); + var queuedRequests = Interlocked.Read(ref _queuedRequests); + var currentConnections = Interlocked.Read(ref _activeConnections); + var queuedConnections = Interlocked.Read(ref _queuedConnections); + var totalClosedConnections = Interlocked.Read(ref _totalClosedConnections); + + statsd.Gauge(MetricsNames.AspNetCoreCurrentRequests, activeRequests); + // Recording these as never-reset gauges seems a bit strange to me as it could easily overflow + // but it's what the event listener already does, so I guess it's required (changing it would be problematic I think) + statsd.Gauge(MetricsNames.AspNetCoreFailedRequests, failedRequests); + statsd.Gauge(MetricsNames.AspNetCoreTotalRequests, failedRequests + successRequests); + statsd.Gauge(MetricsNames.AspNetCoreRequestQueueLength, queuedRequests); + + statsd.Gauge(MetricsNames.AspNetCoreCurrentConnections, currentConnections); + statsd.Gauge(MetricsNames.AspNetCoreConnectionQueueLength, queuedConnections); + + // Same here, seems risky to have this as a gauge, but I think that ship has sailed + // Note also that as _totalClosedConnections doesn't include _current_ connections, we add that in + statsd.Gauge(MetricsNames.AspNetCoreTotalConnections, totalClosedConnections + currentConnections); + Log.Debug($"Sent the following metrics to the DD agent: {MetricsNames.AspNetCoreCurrentRequests}, {MetricsNames.AspNetCoreFailedRequests}, {MetricsNames.AspNetCoreTotalRequests}, {MetricsNames.AspNetCoreRequestQueueLength}, {MetricsNames.AspNetCoreCurrentConnections}, {MetricsNames.AspNetCoreConnectionQueueLength}, {MetricsNames.AspNetCoreTotalConnections}"); + } + } + + private static void OnMeasurementRecordedDouble(Instrument instrument, double measurement, ReadOnlySpan> tags, object? state) + { + var handler = (DiagnosticsMetricsRuntimeMetricsListener)state!; + switch (instrument.Name) + { + case "dotnet.gc.pause.time": + Interlocked.Exchange(ref handler._gcPauseTimeSeconds, measurement); + break; + case "kestrel.connection.duration": + Interlocked.Increment(ref handler._totalClosedConnections); + break; + case "http.server.request.duration": + foreach (var tagPair in tags) + { + if (tagPair is { Key: "http.response.status_code" }) + { + if (tagPair.Value is >= 500) + { + Interlocked.Increment(ref handler._failedRequests); + } + else + { + Interlocked.Increment(ref handler._successRequests); + } + + return; + } + } + + break; + } + } + + private static void OnMeasurementRecordedLong(Instrument instrument, long measurement, ReadOnlySpan> tags, object? state) + { + var handler = (DiagnosticsMetricsRuntimeMetricsListener)state!; + switch (instrument.Name) + { + // Ignore tags for these up-down counters, we only care about totals + case "http.server.active_requests": + Interlocked.Add(ref handler._activeRequests, measurement); + break; + + case "kestrel.active_connections": + Interlocked.Add(ref handler._activeConnections, measurement); + break; + + case "kestrel.queued_connections": + Interlocked.Add(ref handler._queuedConnections, measurement); + break; + + case "kestrel.queued_requests": + Interlocked.Add(ref handler._queuedRequests, measurement); + break; + } + } + + private static double? GetGcPauseTime_RuntimeMetrics(DiagnosticsMetricsRuntimeMetricsListener listener) + { + var gcPauseTimeSeconds = Interlocked.Exchange(ref listener._gcPauseTimeSeconds, 0); + return gcPauseTimeSeconds * 1_000; + } + + private static double? GetGcPauseTime_Noop(DiagnosticsMetricsRuntimeMetricsListener listener) => null; + + private void OnInstrumentPublished(Instrument instrument, MeterListener listener) + { + // We want the following Meter/instruments: + // + // System.Runtime + // - dotnet.gc.pause.time: MetricsNames.GcPauseTime (where possible) + // - [dotnet.gc.collections (tagged by gc.heap.generation=gen0)] - we get these via built-in APIs which are functionally identical + // - [dotnet.gc.last_collection.heap.size (gc.heap.generation=gen0/gen1/gen2/loh/poh)] - we get these via built-in APIs which are functionally identical + // + // Microsoft.AspNetCore.Hosting + // - http.server.active_requests: MetricsNames.AspNetCoreCurrentRequests + // - http.server.request.duration: MetricsNames.AspNetCoreTotalRequests, MetricsNames.AspNetCoreFailedRequests + // + // Microsoft.AspNetCore.Server.Kestrel + // - kestrel.active_connections: MetricsNames.AspNetCoreCurrentConnections, + // - kestrel.queued_connections: MetricsNames.AspNetCoreConnectionQueueLength, + // - kestrel.connection.duration: MetricsNames.AspNetCoreTotalConnections, + // - kestrel.queued_requests: MetricsNames.AspNetCoreRequestQueueLength + // + // We have no way to get these: + // - MetricsNames.ContentionTime. Only available using EventListener + var meterName = instrument.Meter.Name; + var instrumentName = instrument.Name; + if ((string.Equals(meterName, "System.Runtime", StringComparison.Ordinal) && instrumentName is "dotnet.gc.pause.time") + || (string.Equals(meterName, "Microsoft.AspNetCore.Hosting", StringComparison.Ordinal) && instrumentName is + "http.server.active_requests" or + "http.server.request.duration") + || (string.Equals(meterName, "Microsoft.AspNetCore.Server.Kestrel", StringComparison.Ordinal) && instrumentName is + "kestrel.active_connections" or + "kestrel.queued_connections" or + "kestrel.connection.duration" or + "kestrel.queued_requests")) + { + if (Log.IsEnabled(LogEventLevel.Debug)) + { + Log.Debug("Enabled measurement events for instrument: {MeterName}/{InstrumentName} ", meterName, instrumentName); + } + + listener.EnableMeasurementEvents(instrument, state: this); + } + } +} + +#endif diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs index add3b4caf66e..47415d61729d 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs @@ -12,6 +12,7 @@ using System.Threading.Tasks; using Datadog.Trace.DogStatsd; using Datadog.Trace.Logging; +using Datadog.Trace.SourceGenerators; using Datadog.Trace.Vendors.StatsdClient; namespace Datadog.Trace.RuntimeMetrics @@ -30,7 +31,7 @@ internal sealed class RuntimeMetricsWriter : IDisposable private static readonly Version Windows81Version = new(6, 3, 9600); private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(); - private static readonly Func InitializeListenerFunc = InitializeListener; + private static readonly Func InitializeListenerFunc = InitializeListener; [ThreadStatic] private static bool _inspectingFirstChanceException; @@ -66,12 +67,13 @@ internal sealed class RuntimeMetricsWriter : IDisposable private TimeSpan _previousSystemCpu; private int _disposed; - public RuntimeMetricsWriter(IStatsdManager statsd, TimeSpan delay, bool inAzureAppServiceContext) - : this(statsd, delay, inAzureAppServiceContext, InitializeListenerFunc) + public RuntimeMetricsWriter(IStatsdManager statsd, TimeSpan delay, bool inAzureAppServiceContext, bool useDiagnosticsApiListener) + : this(statsd, delay, inAzureAppServiceContext, useDiagnosticsApiListener, InitializeListenerFunc) { } - internal RuntimeMetricsWriter(IStatsdManager statsd, TimeSpan delay, bool inAzureAppServiceContext, Func initializeListener) + [TestingAndPrivateOnly] + internal RuntimeMetricsWriter(IStatsdManager statsd, TimeSpan delay, bool inAzureAppServiceContext, bool useDiagnosticsApiListener, Func initializeListener) { _delay = delay; _statsd = statsd; @@ -119,7 +121,7 @@ internal RuntimeMetricsWriter(IStatsdManager statsd, TimeSpan delay, bool inAzur try { - _listener = initializeListener(statsd, delay, inAzureAppServiceContext); + _listener = initializeListener(statsd, delay, inAzureAppServiceContext, useDiagnosticsApiListener); } catch (Exception ex) { @@ -323,9 +325,13 @@ internal bool PushEvents() return true; } - private static IRuntimeMetricsListener InitializeListener(IStatsdManager statsd, TimeSpan delay, bool inAzureAppServiceContext) + private static IRuntimeMetricsListener InitializeListener(IStatsdManager statsd, TimeSpan delay, bool inAzureAppServiceContext, bool useDiagnosticsApiListener) { -#if NETCOREAPP +#if NET6_0_OR_GREATER + return useDiagnosticsApiListener + ? new DiagnosticsMetricsRuntimeMetricsListener(statsd) + : new RuntimeEventListener(statsd, delay); +#elif NETCOREAPP return new RuntimeEventListener(statsd, delay); #elif NETFRAMEWORK try diff --git a/tracer/src/Datadog.Trace/TracerManagerFactory.cs b/tracer/src/Datadog.Trace/TracerManagerFactory.cs index 67046b1638d6..fa1adb240b5b 100644 --- a/tracer/src/Datadog.Trace/TracerManagerFactory.cs +++ b/tracer/src/Datadog.Trace/TracerManagerFactory.cs @@ -126,7 +126,7 @@ internal TracerManager CreateTracerManager( statsd ??= new StatsdManager(settings); runtimeMetrics ??= settings.RuntimeMetricsEnabled && !DistributedTracer.Instance.IsChildTracer - ? new RuntimeMetricsWriter(statsd, TimeSpan.FromSeconds(10), settings.IsRunningInAzureAppService) + ? new RuntimeMetricsWriter(statsd, TimeSpan.FromSeconds(10), settings.IsRunningInAzureAppService, settings.RuntimeMetricsDiagnosticsMetricsApiEnabled) : null; sampler ??= GetSampler(settings); diff --git a/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/RuntimeMetricsTests.cs b/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/RuntimeMetricsTests.cs index 2d01f7ef0993..b7f897b05d3a 100644 --- a/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/RuntimeMetricsTests.cs +++ b/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/RuntimeMetricsTests.cs @@ -7,6 +7,7 @@ using System.Linq; using System.Text.RegularExpressions; using System.Threading.Tasks; +using Datadog.Trace.Configuration; using Datadog.Trace.RuntimeMetrics; using Datadog.Trace.TestHelpers; using FluentAssertions; @@ -43,6 +44,19 @@ public async Task MetricsDisabled() Assert.True(requests.Count == 0, "Received metrics despite being disabled. Metrics received: " + string.Join("\n", requests)); } +#if NET6_0_OR_GREATER + [SkippableFact] + [Trait("Category", "EndToEnd")] + [Trait("RunOnWindows", "True")] + [Trait("SupportsInstrumentationVerification", "True")] + public async Task DiagnosticsMetricsApiSubmitsMetrics() + { + SetEnvironmentVariable(ConfigurationKeys.RuntimeMetricsDiagnosticsMetricsApiEnabled, "1"); + EnvironmentHelper.EnableDefaultTransport(); + await RunTest(); + } +#endif + [SkippableFact] [Trait("Category", "EndToEnd")] [Trait("RunOnWindows", "True")] diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/DiagnosticMetricsRuntimeMetricsListenerTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/DiagnosticMetricsRuntimeMetricsListenerTests.cs new file mode 100644 index 000000000000..14730d87947e --- /dev/null +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/DiagnosticMetricsRuntimeMetricsListenerTests.cs @@ -0,0 +1,229 @@ +// +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. +// + +#if NET6_0_OR_GREATER + +using System; +using System.Collections.Generic; +using System.Collections.ObjectModel; +using System.Diagnostics; +using System.Diagnostics.Metrics; +using System.Diagnostics.Tracing; +using System.Threading; +using Datadog.Trace.ClrProfiler.AutoInstrumentation.ManualInstrumentation; +using Datadog.Trace.Configuration; +using Datadog.Trace.Configuration.ConfigurationSources; +using Datadog.Trace.Configuration.Telemetry; +using Datadog.Trace.DogStatsd; +using Datadog.Trace.RuntimeMetrics; +using Datadog.Trace.TestHelpers.Stats; +using Datadog.Trace.Vendors.StatsdClient; +using Moq; +using Xunit; +using Range = Moq.Range; + +namespace Datadog.Trace.Tests.RuntimeMetrics; + +[CollectionDefinition(nameof(RuntimeEventListenerTests), DisableParallelization = true)] +[Collection(nameof(RuntimeEventListenerTests))] +public class DiagnosticMetricsRuntimeMetricsListenerTests +{ + [Fact] + public void PushEvents() + { + var statsd = new Mock(); + + using var listener = new DiagnosticsMetricsRuntimeMetricsListener(new TestStatsdManager(statsd.Object)); + + listener.Refresh(); + + statsd.Verify(s => s.Gauge(MetricsNames.ThreadPoolWorkersCount, It.IsAny(), 1, null), Times.Once); + + // some metrics are only recorded the _second_ time this is called, to avoid skewing the results at the start, so we just check for a couple + statsd.Verify(s => s.Gauge(MetricsNames.Gen0HeapSize, It.IsAny(), 1, null), Times.Once); + statsd.Verify(s => s.Gauge(MetricsNames.GcMemoryLoad, It.IsInRange(0d, 100, Range.Inclusive), It.IsAny(), null), Times.AtLeastOnce); + } + + [Fact] + public void MonitorGarbageCollections() + { + var statsd = new Mock(); + using var listener = new DiagnosticsMetricsRuntimeMetricsListener(new TestStatsdManager(statsd.Object)); + + listener.Refresh(); + statsd.Invocations.Clear(); + + GC.Collect(2, GCCollectionMode.Forced, blocking: true, compacting: true); + + listener.Refresh(); + + statsd.Verify(s => s.Gauge(MetricsNames.Gen0HeapSize, It.IsAny(), It.IsAny(), null), Times.AtLeastOnce); + statsd.Verify(s => s.Gauge(MetricsNames.Gen1HeapSize, It.IsAny(), It.IsAny(), null), Times.AtLeastOnce); + statsd.Verify(s => s.Gauge(MetricsNames.Gen2HeapSize, It.IsInRange(0d, long.MaxValue, Range.Exclusive), It.IsAny(), null), Times.AtLeastOnce); + statsd.Verify(s => s.Gauge(MetricsNames.LohSize, It.IsAny(), It.IsAny(), null), Times.AtLeastOnce); + statsd.Verify(s => s.Gauge(MetricsNames.GcMemoryLoad, It.IsInRange(0d, 100, Range.Inclusive), It.IsAny(), null), Times.AtLeastOnce); + statsd.Verify(s => s.Timer(MetricsNames.GcPauseTime, It.IsAny(), It.IsAny(), null), Times.AtLeastOnce); + statsd.Verify(s => s.Increment(MetricsNames.Gen2CollectionsCount, 1, It.IsAny(), null), Times.AtLeastOnce); + } + +#if NET8_0_OR_GREATER + [Fact] + public void AspNetCore_Counters_ActiveRequests() + { + // based on https://github.com/dotnet/aspnetcore/blob/v10.0.1/src/Hosting/Hosting/src/Internal/HostingMetrics.cs + using var meter = new Meter("Microsoft.AspNetCore.Hosting"); + + // Pretending we're aspnetcore + var instrument = meter.CreateUpDownCounter( + "http.server.active_requests", + unit: "{request}", + description: "Number of active HTTP server requests."); + + var statsd = new Mock(); + using var listener = new DiagnosticsMetricsRuntimeMetricsListener(new TestStatsdManager(statsd.Object)); + + listener.Refresh(); + statsd.Invocations.Clear(); + + // First interval + instrument.Add(1); + instrument.Add(1); + instrument.Add(1); + listener.Refresh(); + statsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreCurrentRequests, 3.0, 1, null), Times.Once); + statsd.Invocations.Clear(); + + // Second interval + instrument.Add(-1); + listener.Refresh(); + statsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreCurrentRequests, 2.0, 1, null), Times.Once); + } + + [Fact] + public void AspNetCore_Counters_RequestCounts() + { + // based on https://github.com/dotnet/aspnetcore/blob/v10.0.1/src/Hosting/Hosting/src/Internal/HostingMetrics.cs + using var meter = new Meter("Microsoft.AspNetCore.Hosting"); + + // Pretending we're aspnetcore + var instrument = meter.CreateHistogram( + "http.server.request.duration", + unit: "s", + description: "Duration of HTTP server requests."); + + var statsd = new Mock(); + using var listener = new DiagnosticsMetricsRuntimeMetricsListener(new TestStatsdManager(statsd.Object)); + + listener.Refresh(); + statsd.Invocations.Clear(); + + // success requests + instrument.Record( + 123, + new TagList + { + { "url.scheme", "http" }, + { "http.request.method", "GET" }, + { "network.protocol.version", "1.1" }, + { "http.response.status_code", (object)200 }, + { "http.route", "/" } + }); + + instrument.Record( + 456, + new TagList + { + { "url.scheme", "http" }, + { "http.request.method", "GET" }, + { "network.protocol.version", "1.1" }, + { "http.response.status_code", (object)201 }, + { "http.route", "/" } + }); + + // failed + instrument.Record( + 789, + new TagList + { + { "url.scheme", "https" }, + { "http.request.method", "POST" }, + { "network.protocol.version", "2.0" }, + { "http.response.status_code", (object)500 }, + { "http.route", "/" }, + { "error.type", typeof(ArgumentException).FullName } + }); + + listener.Refresh(); + + statsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreFailedRequests, 1.0, 1, null), Times.Once); + statsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreTotalRequests, 3.0, 1, null), Times.Once); + statsd.Invocations.Clear(); + + // success requests + instrument.Record( + 1, + new TagList + { + { "url.scheme", "http" }, + { "http.request.method", "GET" }, + { "network.protocol.version", "1.1" }, + { "http.response.status_code", (object)200 }, + { "http.route", "/" } + }); + + listener.Refresh(); + + statsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreFailedRequests, 1.0, 1, null), Times.Once); + statsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreTotalRequests, 4.0, 1, null), Times.Once); + } + + [Fact] + public void UpdateStatsdOnReinitialization() + { + // based on https://github.com/dotnet/aspnetcore/blob/v10.0.1/src/Hosting/Hosting/src/Internal/HostingMetrics.cs + using var meter = new Meter("Microsoft.AspNetCore.Hosting"); + + // Pretending we're aspnetcore + var instrument = meter.CreateUpDownCounter( + "http.server.active_requests", + unit: "{request}", + description: "Number of active HTTP server requests."); + + var originalStatsd = new Mock(); + var newStatsd = new Mock(); + + var settings = TracerSettings.Create(new() { { ConfigurationKeys.ServiceName, "original" } }); + var statsdManager = new StatsdManager( + settings, + (m, e) => new(m.ServiceName == "original" ? originalStatsd.Object : newStatsd.Object)); + statsdManager.SetRequired(StatsdConsumer.RuntimeMetricsWriter, true); + using var listener = new DiagnosticsMetricsRuntimeMetricsListener(statsdManager); + + listener.Refresh(); + originalStatsd.Invocations.Clear(); + + // First interval + instrument.Add(1); + instrument.Add(1); + instrument.Add(1); + listener.Refresh(); + originalStatsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreCurrentRequests, 3.0, 1, null), Times.Once); + originalStatsd.Invocations.Clear(); + + // Updating the service name should trigger a new statsd client to be created + settings.Manager.UpdateManualConfigurationSettings( + new ManualInstrumentationConfigurationSource( + new ReadOnlyDictionary(new Dictionary { { TracerSettingKeyConstants.ServiceNameKey, "updated" } }), + useDefaultSources: true), + NullConfigurationTelemetry.Instance); + + // Second interval + instrument.Add(-1); + listener.Refresh(); + newStatsd.Verify(s => s.Gauge(MetricsNames.AspNetCoreCurrentRequests, 2.0, 1, null), Times.Once); + } +#endif +} +#endif diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeEventListenerTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeEventListenerTests.cs index 2701dc39d087..f6ac86c3a935 100644 --- a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeEventListenerTests.cs +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeEventListenerTests.cs @@ -154,7 +154,7 @@ public void UpdateStatsdOnReinitialization() (m, e) => new(m.ServiceName == "original" ? originalStatsd.Object : newStatsd.Object)); using var listener = new RuntimeEventListener(statsdManager, TimeSpan.FromSeconds(1)); - using var writer = new RuntimeMetricsWriter(statsdManager, TimeSpan.FromSeconds(1), false); + using var writer = new RuntimeMetricsWriter(statsdManager, TimeSpan.FromSeconds(1), false, false); mutex.Wait(); diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs index 4e783833d09c..a48fdfb2fcb1 100644 --- a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs @@ -30,7 +30,7 @@ public void PushEvents() listener.Setup(l => l.Refresh()) .Callback(() => mutex.Set()); - using (new RuntimeMetricsWriter(new TestStatsdManager(Mock.Of()), TimeSpan.FromMilliseconds(10), false, (statsd, timeSpan, inAppContext) => listener.Object)) + using (new RuntimeMetricsWriter(new TestStatsdManager(Mock.Of()), TimeSpan.FromMilliseconds(10), false, false, (statsd, timeSpan, inAppContext, _) => listener.Object)) { Assert.True(mutex.Wait(10000), "Method Refresh() wasn't called on the listener"); } @@ -39,7 +39,7 @@ public void PushEvents() [Fact] public void ShouldSwallowFactoryExceptions() { - var writer = new RuntimeMetricsWriter(new TestStatsdManager(Mock.Of()), TimeSpan.FromMilliseconds(10), false, (statsd, timeSpan, inAppContext) => throw new InvalidOperationException("This exception should be caught")); + var writer = new RuntimeMetricsWriter(new TestStatsdManager(Mock.Of()), TimeSpan.FromMilliseconds(10), false, false, (statsd, timeSpan, inAppContext, _) => throw new InvalidOperationException("This exception should be caught")); writer.Dispose(); } @@ -49,7 +49,7 @@ public void ShouldCaptureFirstChanceExceptions() var statsd = new Mock(); var listener = new Mock(); - using (var writer = new RuntimeMetricsWriter(new TestStatsdManager(statsd.Object), TimeSpan.FromMilliseconds(Timeout.Infinite), false, (statsd, timeSpan, inAppContext) => listener.Object)) + using (var writer = new RuntimeMetricsWriter(new TestStatsdManager(statsd.Object), TimeSpan.FromMilliseconds(Timeout.Infinite), false, false, (statsd, timeSpan, inAppContext, _) => listener.Object)) { for (int i = 0; i < 10; i++) { @@ -114,7 +114,7 @@ public async Task ShouldCaptureProcessMetrics() var statsd = new Mock(); var listener = new Mock(); - using (new RuntimeMetricsWriter(new TestStatsdManager(statsd.Object), TimeSpan.FromSeconds(1), false, (_, _, _) => listener.Object)) + using (new RuntimeMetricsWriter(new TestStatsdManager(statsd.Object), TimeSpan.FromSeconds(1), false, false, (_, _, _, _) => listener.Object)) { var expectedNumberOfThreads = Process.GetCurrentProcess().Threads.Count; @@ -178,7 +178,7 @@ public void CleanupResources() var statsd = new Mock(); var listener = new Mock(); - var writer = new RuntimeMetricsWriter(new TestStatsdManager(statsd.Object), TimeSpan.FromMilliseconds(Timeout.Infinite), false, (statsd, timeSpan, inAppContext) => listener.Object); + var writer = new RuntimeMetricsWriter(new TestStatsdManager(statsd.Object), TimeSpan.FromMilliseconds(Timeout.Infinite), false, false, (statsd, timeSpan, inAppContext, _) => listener.Object); writer.Dispose(); #if NETFRAMEWORK diff --git a/tracer/test/Datadog.Trace.Tests/Telemetry/config_norm_rules.json b/tracer/test/Datadog.Trace.Tests/Telemetry/config_norm_rules.json index 84161c1b2129..f0609a8182e4 100644 --- a/tracer/test/Datadog.Trace.Tests/Telemetry/config_norm_rules.json +++ b/tracer/test/Datadog.Trace.Tests/Telemetry/config_norm_rules.json @@ -339,6 +339,7 @@ "DD_APM_ENABLE_RARE_SAMPLER": "trace_rare_sampler_enabled", "DD_TRACE_METRICS_ENABLED": "trace_metrics_enabled", "DD_RUNTIME_METRICS_ENABLED": "runtime_metrics_enabled", + "DD_RUNTIME_METRICS_DIAGNOSTICS_METRICS_API_ENABLED": "runtime_metrics_diagnostics_metrics_api_enabled", "DD_TRACE_AGENT_PATH": "agent_trace_agent_excecutable_path", "DD_TRACE_AGENT_ARGS": "agent_trace_agent_excecutable_args", "DD_DOGSTATSD_PATH": "agent_dogstatsd_executable_path", diff --git a/tracer/test/Datadog.Trace.Tests/TracerManagerFactoryTests.cs b/tracer/test/Datadog.Trace.Tests/TracerManagerFactoryTests.cs index 408933104353..9427a97eacb5 100644 --- a/tracer/test/Datadog.Trace.Tests/TracerManagerFactoryTests.cs +++ b/tracer/test/Datadog.Trace.Tests/TracerManagerFactoryTests.cs @@ -158,7 +158,7 @@ static DirectLogSubmissionManager BuildLogSubmissionManager() gitMetadataTagsProvider: Mock.Of()); static RuntimeMetricsWriter BuildRuntimeMetrics() - => new(new TestStatsdManager(Mock.Of()), TimeSpan.FromMinutes(1), inAzureAppServiceContext: false, (_, _, _) => Mock.Of()); + => new(new TestStatsdManager(Mock.Of()), TimeSpan.FromMinutes(1), inAzureAppServiceContext: false, useDiagnosticsApiListener: false, initializeListener: (_, _, _, _) => Mock.Of()); } private static IConfigurationSource CreateConfigurationSource(params (string Key, string Value)[] values)