feat(runtime_metrics): add runtime id and gauge metric support (#12555)

mabdinur · wantsui · web-flow · commit 46959ab53773 · 2025-03-17T17:49:17.000-04:00
- Adds support for sending runtime metrics as gauge metrics (instead of distributions). This feature is disabled by default and can be enabled by setting ``DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED=DD_RUNTIME_METRICS_ENABLED``. - Adds support for tagging runtime metrics with the current runtime ID. This feature is disabled by default and can be enabled by ``DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED=True``. Note: Sending runtime metrics as gauges after sending the same metric names as disitrubtions can cause the old distribution metric to overshadow the new gauge metrics. Since this feature is still in beta we are okay with the breaking change cc: @wantsui ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: wantsui <wan.tsui@datadoghq.com>
diff --git a/ddtrace/internal/constants.py b/ddtrace/internal/constants.py
@@ -110,3 +110,9 @@ class SamplingMechanism(object):
 }
 _KEEP_PRIORITY_INDEX = 0
 _REJECT_PRIORITY_INDEX = 1
+
+
+# List of support values in DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED
+class EXPERIMENTAL_FEATURES:
+    # Enables submitting runtime metrics as gauges (instead of distributions)
+    RUNTIME_METRICS = "DD_RUNTIME_METRICS_ENABLED"
diff --git a/ddtrace/internal/runtime/runtime_metrics.py b/ddtrace/internal/runtime/runtime_metrics.py
@@ -7,6 +7,7 @@
 import ddtrace
 from ddtrace.internal import atexit
 from ddtrace.internal import forksafe
+from ddtrace.internal.constants import EXPERIMENTAL_FEATURES
 from ddtrace.vendor.dogstatsd import DogStatsd
 
 from .. import periodic
@@ -16,6 +17,7 @@
 from .metric_collectors import GCRuntimeMetricCollector
 from .metric_collectors import PSUtilRuntimeMetricCollector
 from .tag_collectors import PlatformTagCollector
+from .tag_collectors import PlatformTagCollectorV2
 from .tag_collectors import TracerTagCollector
 
 
@@ -45,6 +47,12 @@ class PlatformTags(RuntimeCollectorsIterable):
     COLLECTORS = [PlatformTagCollector]
 
 
+class PlatformTagsV2(RuntimeCollectorsIterable):
+    # DEV: `None` means to allow all tags generated by PlatformTagCollector and TracerTagCollector
+    ENABLED = None
+    COLLECTORS = [PlatformTagCollectorV2]
+
+
 class TracerTags(RuntimeCollectorsIterable):
     # DEV: `None` means to allow all tags generated by PlatformTagCollector and TracerTagCollector
     ENABLED = None
@@ -80,7 +88,17 @@ def __init__(self, interval=_get_interval_or_default(), tracer=None, dogstatsd_u
         )
         self.tracer: ddtrace.trace.Tracer = tracer or ddtrace.tracer
         self._runtime_metrics: RuntimeMetrics = RuntimeMetrics()
-        self._platform_tags: List[str] = self._format_tags(PlatformTags())
+        if EXPERIMENTAL_FEATURES.RUNTIME_METRICS in ddtrace.config._experimental_features_enabled:
+            # Enables sending runtime metrics as gauges (instead of distributions with a new metric name)
+            self.send_metric = self._dogstatsd_client.gauge
+        else:
+            self.send_metric = self._dogstatsd_client.distribution
+
+        if ddtrace.config._runtime_metrics_runtim_id_enabled:
+            # Enables tagging runtime metrics with runtime-id (as well as all the v1 tags)
+            self._platform_tags = self._format_tags(PlatformTagsV2())
+        else:
+            self._platform_tags = self._format_tags(PlatformTags())
 
     @classmethod
     def disable(cls):
@@ -130,13 +148,13 @@ def flush(self):
         # type: () -> None
         # Ensure runtime metrics have up-to-date tags (ex: service, env, version)
         rumtime_tags = self._format_tags(TracerTags()) + self._platform_tags
-        log.debug("Updating constant tags %s", rumtime_tags)
+        log.debug("Sending runtime metrics with the following tags: %s", rumtime_tags)
         self._dogstatsd_client.constant_tags = rumtime_tags
 
         with self._dogstatsd_client:
             for key, value in self._runtime_metrics:
-                log.debug("Writing metric %s:%s", key, value)
-                self._dogstatsd_client.distribution(key, value)
+                log.debug("Sending ddtrace runtime metric %s:%s", key, value)
+                self.send_metric(key, value)
 
     def _stop_service(self):
         # type: (...) -> None
diff --git a/ddtrace/internal/runtime/tag_collectors.py b/ddtrace/internal/runtime/tag_collectors.py
@@ -1,6 +1,8 @@
 from typing import List  # noqa:F401
 from typing import Tuple  # noqa:F401
 
+from ddtrace.internal.runtime import get_runtime_id
+
 from ...constants import ENV_KEY
 from ...constants import VERSION_KEY
 from ..constants import DEFAULT_SERVICE_NAME
@@ -59,7 +61,6 @@ class PlatformTagCollector(RuntimeTagCollector):
     - `lang_version``,  eg ``2.7.10``
     - ``lang`` e.g. ``Python``
     - ``tracer_version`` e.g. ``0.29.0``
-
     """
 
     required_modules = ["platform", "ddtrace"]
@@ -74,3 +75,25 @@ def collect_fn(self, keys):
             (TRACER_VERSION, ddtrace.__version__),
         ]
         return tags
+
+
+class PlatformTagCollectorV2(PlatformTagCollector):
+    """Tag collector for the Python interpreter implementation.
+
+    Tags collected:
+    - ``lang_interpreter``:
+
+      * For CPython this is 'CPython'.
+      * For Pypy this is ``PyPy``
+      * For Jython this is ``Jython``
+
+    - `lang_version``,  eg ``2.7.10``
+    - ``lang`` e.g. ``Python``
+    - ``tracer_version`` e.g. ``0.29.0``
+    - ``runtime-id`` e.g. `e4724609efa84cf58424a8b1ef44b17d`
+    """
+
+    def collect_fn(self, keys):
+        tags = super(PlatformTagCollectorV2, self).collect_fn(keys)
+        tags.append(("runtime-id", get_runtime_id()))
+        return tags
diff --git a/ddtrace/settings/_config.py b/ddtrace/settings/_config.py
@@ -554,6 +554,10 @@ def __init__(self):
         self._runtime_metrics_enabled = _get_config(
             "DD_RUNTIME_METRICS_ENABLED", False, asbool, "OTEL_METRICS_EXPORTER"
         )
+        self._runtime_metrics_runtim_id_enabled = _get_config("DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED", False, asbool)
+        self._experimental_features_enabled = _get_config(
+            "DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED", set(), lambda x: set(x.strip().upper().split(","))
+        )
 
         self._128_bit_trace_id_enabled = _get_config("DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED", True, asbool)
 
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -855,6 +855,25 @@ Other
          These metrics track the memory management and concurrency of the python runtime. 
          Refer to the following `docs <https://docs.datadoghq.com/tracing/metrics/runtime_metrics/python/>` _ for more information.
 
+   DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED:
+     type: Boolean
+     default: False
+     version_added:
+       v3.2.0: Adds initial support
+
+     description: |
+         Adds support for tagging runtime metrics with the current runtime ID. This is useful for tracking runtime metrics across multiple processes.
+         Refer to the following `docs <https://docs.datadoghq.com/tracing/metrics/runtime_metrics/python/>` _ for more information.
+
+   DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED:
+     type: string
+     version_added:
+       v3.2.0: Adds initial support and support for enabling experimental runtime metrics. 
+     default: ""
+
+     description: |
+         Enables support for experimental ddtrace configurations. The supported configurations are: ``DD_RUNTIME_METRICS_ENABLED``.
+
    DD_SUBPROCESS_SENSITIVE_WILDCARDS:
      type: String
      
diff --git a/releasenotes/notes/use-gauge-metrics-in-rm-61ae620e83b95740.yaml b/releasenotes/notes/use-gauge-metrics-in-rm-61ae620e83b95740.yaml
@@ -0,0 +1,6 @@
+---
+features:
+  - |
+    runtime_metrics: Adds support for sending runtime metrics as gauge metrics (instead of distributions). To enable this feature set ``DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED=DD_RUNTIME_METRICS_ENABLED``. 
+  - |
+    runtime_metrics: Adds support for tagging runtime metrics with the current runtime ID. To enable tagging, set ``DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED=True``.
diff --git a/tests/runtime/test_runtime_metrics_api.py b/tests/runtime/test_runtime_metrics_api.py
@@ -196,3 +196,57 @@ def test_runtime_metrics_enable_environ(monkeypatch, environ):
         )
     finally:
         RuntimeMetrics.disable()
+
+
+@pytest.mark.subprocess(parametrize={"DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED": ["true", "false"]})
+def test_runtime_metrics_experimental_runtime_tag():
+    """
+    When runtime metrics is enabled and DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED=DD_RUNTIME_METRICS_ENABLED
+        Runtime metrics worker starts and submits gauge metrics instead of distribution metrics
+    """
+    import os
+
+    from ddtrace.internal.runtime import get_runtime_id
+    from ddtrace.internal.runtime.runtime_metrics import RuntimeWorker
+    from ddtrace.internal.service import ServiceStatus
+
+    RuntimeWorker.enable()
+    assert RuntimeWorker._instance is not None
+
+    worker_instance = RuntimeWorker._instance
+    assert worker_instance.status == ServiceStatus.RUNNING
+
+    runtime_id_tag = f"runtime-id:{get_runtime_id()}"
+    if os.environ["DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED"] == "true":
+        assert runtime_id_tag in worker_instance._platform_tags, worker_instance._platform_tags
+    elif os.environ["DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED"] == "false":
+        assert runtime_id_tag not in worker_instance._platform_tags, worker_instance._platform_tags
+    else:
+        raise pytest.fail("Invalid value for DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED")
+
+
+@pytest.mark.subprocess(
+    parametrize={"DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED": ["DD_RUNTIME_METRICS_ENABLED,someotherfeature", ""]},
+    err=None,
+)
+def test_runtime_metrics_experimental_metric_type():
+    """
+    When runtime metrics is enabled and DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED=DD_RUNTIME_METRICS_ENABLED
+        Runtime metrics worker starts and submits gauge metrics instead of distribution metrics
+    """
+    import os
+
+    from ddtrace.internal.runtime.runtime_metrics import RuntimeWorker
+    from ddtrace.internal.service import ServiceStatus
+
+    RuntimeWorker.enable()
+    assert RuntimeWorker._instance is not None
+
+    worker_instance = RuntimeWorker._instance
+    assert worker_instance.status == ServiceStatus.RUNNING
+    if "DD_RUNTIME_METRICS_ENABLED" in os.environ["DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED"]:
+        assert worker_instance.send_metric == worker_instance._dogstatsd_client.gauge, worker_instance.send_metric
+    else:
+        assert (
+            worker_instance.send_metric == worker_instance._dogstatsd_client.distribution
+        ), worker_instance.send_metric
diff --git a/tests/telemetry/test_writer.py b/tests/telemetry/test_writer.py
@@ -465,6 +465,8 @@ def test_app_started_event_configuration_override(test_agent_session, run_python
         {"name": "DD_TRACE_COMPUTE_STATS", "origin": "env_var", "value": True},
         {"name": "DD_TRACE_DEBUG", "origin": "env_var", "value": True},
         {"name": "DD_TRACE_ENABLED", "origin": "env_var", "value": False},
+        {"name": "DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED", "origin": "default", "value": "set()"},
+        {"name": "DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED", "origin": "default", "value": False},
         {"name": "DD_TRACE_HEADER_TAGS", "origin": "default", "value": ""},
         {"name": "DD_TRACE_HEALTH_METRICS_ENABLED", "origin": "env_var", "value": True},
         {"name": "DD_TRACE_HTTP_CLIENT_TAG_QUERY_STRING", "origin": "default", "value": "true"},
diff --git a/tests/tracer/runtime/test_metric_collectors.py b/tests/tracer/runtime/test_metric_collectors.py
@@ -28,8 +28,9 @@ def collect_fn(self, keys):
 class TestPSUtilRuntimeMetricCollector(BaseTestCase):
     def test_metrics(self):
         collector = PSUtilRuntimeMetricCollector()
-        for _, value in collector.collect(PSUTIL_RUNTIME_METRICS):
+        for metric_name, value in collector.collect(PSUTIL_RUNTIME_METRICS):
             self.assertIsNotNone(value)
+            self.assertRegex(metric_name, r"^runtime.python\..*")
 
     def test_static_metrics(self):
         import os
@@ -127,8 +128,9 @@ def thread_stopper(stop_event):
 class TestGCRuntimeMetricCollector(BaseTestCase):
     def test_metrics(self):
         collector = GCRuntimeMetricCollector()
-        for _, value in collector.collect(GC_RUNTIME_METRICS):
+        for metric_name, value in collector.collect(GC_RUNTIME_METRICS):
             self.assertIsNotNone(value)
+            self.assertRegex(metric_name, r"^runtime.python\..*")
 
     def test_gen1_changes(self):
         # disable gc
diff --git a/tests/tracer/runtime/test_runtime_metrics.py b/tests/tracer/runtime/test_runtime_metrics.py
@@ -81,6 +81,18 @@ def test_runtime_tags_empty():
     assert set(tags.keys()) == set(["lang", "lang_interpreter", "lang_version", "tracer_version"])
 
 
+@pytest.mark.subprocess()
+def test_runtime_platformv2_tags():
+    from ddtrace.internal.runtime.runtime_metrics import PlatformTagsV2
+
+    tags = list(PlatformTagsV2())
+    assert len(tags) == 5
+
+    tags = dict(tags)
+    # Ensure runtime-id is present along with all the v1 tags
+    assert set(tags.keys()) == set(["lang", "lang_interpreter", "lang_version", "tracer_version", "runtime-id"])
+
+
 @pytest.mark.subprocess(env={"DD_SERVICE": "my-service", "DD_ENV": "test-env", "DD_VERSION": "1.2.3"})
 def test_runtime_tags_usm():
     from ddtrace.internal.runtime.runtime_metrics import TracerTags

Original file line number	Diff line number	Diff line change
`@@ -554,6 +554,10 @@ def __init__(self):`
`554`	`554`	`self._runtime_metrics_enabled = _get_config(`
`555`	`555`	`"DD_RUNTIME_METRICS_ENABLED", False, asbool, "OTEL_METRICS_EXPORTER"`
`556`	`556`	`)`
	`557`	`+ self._runtime_metrics_runtim_id_enabled = _get_config("DD_TRACE_EXPERIMENTAL_RUNTIME_ID_ENABLED", False, asbool)`
	`558`	`+ self._experimental_features_enabled = _get_config(`
	`559`	`+ "DD_TRACE_EXPERIMENTAL_FEATURES_ENABLED", set(), lambda x: set(x.strip().upper().split(","))`
	`560`	`+ )`
`557`	`561`
`558`	`562`	`self._128_bit_trace_id_enabled = _get_config("DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED", True, asbool)`
`559`	`563`