Skip to content

Commit b4f7013

Browse files
authored
Store span ids in profiling events (#1043)
* feat(tracer): add a on_start_span hook This allows to register a function to be called when a new span is being started by a Tracer. * chore(black): blackify ddtrace/settings * feat(profiling/stack): collect span ids for running thread and stack frames
1 parent 7c74f25 commit b4f7013

File tree

11 files changed

+290
-38
lines changed

11 files changed

+290
-38
lines changed

ddtrace/profiling/collector/stack.pyx

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
"""CPU profiling collector."""
22
from __future__ import absolute_import
33

4+
import collections
5+
import logging
46
import sys
57
import threading
8+
import weakref
69

710
from ddtrace import compat
811
from ddtrace.profiling import _attr
@@ -12,6 +15,23 @@ from ddtrace.profiling import event
1215
from ddtrace.profiling.collector import _traceback
1316
from ddtrace.utils import formats
1417
from ddtrace.vendor import attr
18+
from ddtrace.vendor import six
19+
20+
21+
_LOG = logging.getLogger(__name__)
22+
23+
24+
if "gevent" in sys.modules:
25+
try:
26+
import gevent.monkey
27+
except ImportError:
28+
_LOG.error("gevent loaded but unable to import gevent.monkey")
29+
from ddtrace.vendor.six.moves._thread import get_ident as _thread_get_ident
30+
else:
31+
_thread_get_ident = gevent.monkey.get_original("thread" if six.PY2 else "_thread", "get_ident")
32+
else:
33+
from ddtrace.vendor.six.moves._thread import get_ident as _thread_get_ident
34+
1535

1636
# NOTE: Do not use LOG here. This code runs under a real OS thread and is unable to acquire any lock of the `logging`
1737
# module without having gevent crashing our dedicated thread.
@@ -112,6 +132,7 @@ class StackBasedEvent(event.SampleEvent):
112132
thread_name = attr.ib(default=None)
113133
frames = attr.ib(default=None)
114134
nframes = attr.ib(default=None)
135+
trace_ids = attr.ib(default=None)
115136

116137

117138
@event.event_class
@@ -198,7 +219,7 @@ cdef get_thread_name(thread_id):
198219
return "Anonymous Thread %d" % thread_id
199220

200221

201-
cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_time):
222+
cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_time, thread_span_links):
202223
current_exceptions = []
203224

204225
IF PY_MAJOR_VERSION >= 3 and PY_MINOR_VERSION >= 7:
@@ -241,6 +262,8 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
241262

242263
running_thread_ids = {t[0] for t in running_threads}
243264

265+
thread_span_links.clear_threads(running_thread_ids)
266+
244267
if ignore_profiler:
245268
running_thread_ids -= _periodic.PERIODIC_THREAD_IDS
246269

@@ -250,11 +273,13 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
250273
for tid, frame in running_threads:
251274
if ignore_profiler and tid in _periodic.PERIODIC_THREAD_IDS:
252275
continue
276+
spans = thread_span_links.get_active_leaf_spans_from_thread_id(tid)
253277
frames, nframes = _traceback.pyframe_to_frames(frame, max_nframes)
254278
stack_events.append(
255279
StackSampleEvent(
256280
thread_id=tid,
257281
thread_name=get_thread_name(tid),
282+
trace_ids=set(span.trace_id for span in spans),
258283
nframes=nframes, frames=frames,
259284
wall_time_ns=wall_time,
260285
cpu_time_ns=cpu_time[tid],
@@ -281,6 +306,59 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim
281306
return stack_events, exc_events
282307

283308

309+
@attr.s(slots=True, eq=False)
310+
class _ThreadSpanLinks(object):
311+
312+
_thread_id_to_spans = attr.ib(factory=lambda: collections.defaultdict(weakref.WeakSet), repr=False, init=False)
313+
314+
# We do not use a lock because:
315+
# - When adding new items, it's not possible for a same thread to call `link_span` at different time
316+
# Since the WeakSet are per-thread, there's no chance of creating 2 WeakSet for the same thread
317+
# - When reading the span WeakSet for a thread, the set is copied which means that if it was ever mutated during
318+
# our reading, we wouldn't care.
319+
# In practice, here, it won't even mutate during the reading since this only used by the stack collector which
320+
# already owns the GIL.
321+
322+
def link_span(self, span):
323+
"""Link a span to its running environment.
324+
325+
Track threads, tasks, etc.
326+
"""
327+
self._thread_id_to_spans[_thread_get_ident()].add(span)
328+
329+
def clear_threads(self, existing_thread_ids):
330+
"""Clear the stored list of threads based on the list of existing thread ids.
331+
332+
If any thread that is part of this list was stored, its data will be deleted.
333+
334+
:param existing_thread_ids: A set of thread ids to keep.
335+
"""
336+
# Iterate over a copy of the list of keys in case it's mutated during our iteration.
337+
for thread_id in list(self._thread_id_to_spans.keys()):
338+
if thread_id not in existing_thread_ids:
339+
del self._thread_id_to_spans[thread_id]
340+
341+
def get_active_leaf_spans_from_thread_id(self, thread_id):
342+
"""Return the latest active spans for a thread.
343+
344+
In theory this should return a single span, though if multiple children span are active without being finished,
345+
there can be several spans returned.
346+
347+
:param thread_id: The thread id.
348+
:return: A set with the active spans.
349+
"""
350+
spans = set(self._thread_id_to_spans.get(thread_id, ()))
351+
# Iterate over a copy so we can modify the original
352+
for span in set(spans):
353+
if not span.finished:
354+
try:
355+
spans.remove(span._parent)
356+
except KeyError:
357+
pass
358+
359+
return {span for span in spans if not span.finished}
360+
361+
284362
@attr.s(slots=True)
285363
class StackCollector(collector.PeriodicCollector):
286364
"""Execution stacks collector."""
@@ -295,8 +373,10 @@ class StackCollector(collector.PeriodicCollector):
295373
max_time_usage_pct = attr.ib(factory=_attr.from_env("DD_PROFILING_MAX_TIME_USAGE_PCT", 2, float))
296374
nframes = attr.ib(factory=_attr.from_env("DD_PROFILING_MAX_FRAMES", 64, int))
297375
ignore_profiler = attr.ib(factory=_attr.from_env("DD_PROFILING_IGNORE_PROFILER", True, formats.asbool))
376+
tracer = attr.ib(default=None)
298377
_thread_time = attr.ib(init=False, repr=False)
299378
_last_wall_time = attr.ib(init=False, repr=False)
379+
_thread_span_links = attr.ib(factory=_ThreadSpanLinks, init=False, repr=False)
300380

301381
@max_time_usage_pct.validator
302382
def _check_max_time_usage(self, attribute, value):
@@ -306,8 +386,15 @@ class StackCollector(collector.PeriodicCollector):
306386
def start(self):
307387
self._thread_time = ThreadTime()
308388
self._last_wall_time = compat.monotonic_ns()
389+
if self.tracer is not None:
390+
self.tracer.on_start_span(self._thread_span_links.link_span)
309391
super(StackCollector, self).start()
310392

393+
def stop(self):
394+
super(StackCollector, self).stop()
395+
if self.tracer is not None:
396+
self.tracer.deregister_on_start_span(self._thread_span_links.link_span)
397+
311398
def _compute_new_interval(self, used_wall_time_ns):
312399
interval = (used_wall_time_ns / (self.max_time_usage_pct / 100.0)) - used_wall_time_ns
313400
return max(interval / 1e9, self.MIN_INTERVAL_TIME)
@@ -319,7 +406,7 @@ class StackCollector(collector.PeriodicCollector):
319406
self._last_wall_time = now
320407

321408
all_events = stack_collect(
322-
self.ignore_profiler, self._thread_time, self.nframes, self.interval, wall_time,
409+
self.ignore_profiler, self._thread_time, self.nframes, self.interval, wall_time, self._thread_span_links,
323410
)
324411

325412
used_wall_time_ns = compat.monotonic_ns() - now

ddtrace/profiling/profiler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
import os
44

5+
import ddtrace
56
from ddtrace.profiling import recorder
67
from ddtrace.profiling import scheduler
78
from ddtrace.vendor import attr
@@ -34,7 +35,7 @@ def _build_default_exporters():
3435
def _build_default_collectors():
3536
r = recorder.Recorder()
3637
return [
37-
stack.StackCollector(r),
38+
stack.StackCollector(r, tracer=ddtrace.tracer),
3839
memory.MemoryCollector(r),
3940
exceptions.UncaughtExceptionCollector(r),
4041
threading.LockCollector(r),

ddtrace/settings/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
config = Config()
99

1010
__all__ = [
11-
'config',
12-
'Config',
13-
'ConfigException',
14-
'HttpConfig',
15-
'Hooks',
16-
'IntegrationConfig',
11+
"config",
12+
"Config",
13+
"ConfigException",
14+
"HttpConfig",
15+
"Hooks",
16+
"IntegrationConfig",
1717
]

ddtrace/settings/config.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,17 @@ class Config(object):
3838
this instance to register their defaults, so that they're public
3939
available and can be updated by users.
4040
"""
41+
4142
def __init__(self):
4243
# use a dict as underlying storing mechanism
4344
self._config = {}
4445
self._http = HttpConfig()
4546
# Master switch for turning on and off trace search by default
4647
# this weird invocation of get_env is meant to read the DD_ANALYTICS_ENABLED
4748
# legacy environment variable. It should be removed in the future
48-
legacy_config_value = get_env('analytics', 'enabled', default=False)
49+
legacy_config_value = get_env("analytics", "enabled", default=False)
4950

50-
self.analytics_enabled = asbool(
51-
get_env('trace', 'analytics_enabled', default=legacy_config_value)
52-
)
51+
self.analytics_enabled = asbool(get_env("trace", "analytics_enabled", default=legacy_config_value))
5352

5453
# DEV: we don't use `self._get_service()` here because {DD,DATADOG}_SERVICE and
5554
# {DD,DATADOG}_SERVICE_NAME (deprecated) are distinct functionalities.
@@ -59,13 +58,9 @@ def __init__(self):
5958
self.version = get_env("version")
6059
self.logs_injection = asbool(get_env("logs", "injection", default=False))
6160

62-
self.report_hostname = asbool(
63-
get_env('trace', 'report_hostname', default=False)
64-
)
61+
self.report_hostname = asbool(get_env("trace", "report_hostname", default=False))
6562

66-
self.health_metrics_enabled = asbool(
67-
get_env('trace', 'health_metrics_enabled', default=False)
68-
)
63+
self.health_metrics_enabled = asbool(get_env("trace", "health_metrics_enabled", default=False))
6964

7065
def __getattr__(self, name):
7166
if name not in self._config:
@@ -81,7 +76,7 @@ def get_from(self, obj):
8176
"""
8277
pin = Pin.get_from(obj)
8378
if pin is None:
84-
log.debug('No configuration found for %s', obj)
79+
log.debug("No configuration found for %s", obj)
8580
return {}
8681

8782
return pin._config
@@ -155,5 +150,5 @@ def _get_service(self, default=None):
155150

156151
def __repr__(self):
157152
cls = self.__class__
158-
integrations = ', '.join(self._config.keys())
159-
return '{}.{}({})'.format(cls.__module__, cls.__name__, integrations)
153+
integrations = ", ".join(self._config.keys())
154+
return "{}.{}({})".format(cls.__module__, cls.__name__, integrations)

ddtrace/settings/exceptions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ class ConfigException(Exception):
22
"""Configuration exception when an integration that is not available
33
is called in the `Config` object.
44
"""
5+
56
pass

ddtrace/settings/http.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ def header_is_traced(self, header_name):
4646
:rtype: bool
4747
"""
4848
normalized_header_name = normalize_header_name(header_name)
49-
log.debug('Checking header \'%s\' tracing in whitelist %s', normalized_header_name, self._whitelist_headers)
49+
log.debug("Checking header '%s' tracing in whitelist %s", normalized_header_name, self._whitelist_headers)
5050
return normalized_header_name in self._whitelist_headers
5151

5252
def __repr__(self):
53-
return '<{} traced_headers={} trace_query_string={}>'.format(
54-
self.__class__.__name__, self._whitelist_headers, self.trace_query_string)
53+
return "<{} traced_headers={} trace_query_string={}>".format(
54+
self.__class__.__name__, self._whitelist_headers, self.trace_query_string
55+
)

ddtrace/settings/integration.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class IntegrationConfig(AttrDict):
2121
config.flask['service_name'] = 'my-service-name'
2222
config.flask.service_name = 'my-service-name'
2323
"""
24+
2425
def __init__(self, global_config, name, *args, **kwargs):
2526
"""
2627
:param global_config:
@@ -32,19 +33,19 @@ def __init__(self, global_config, name, *args, **kwargs):
3233

3334
# Set internal properties for this `IntegrationConfig`
3435
# DEV: By-pass the `__setattr__` overrides from `AttrDict` to set real properties
35-
object.__setattr__(self, 'global_config', global_config)
36-
object.__setattr__(self, 'integration_name', name)
37-
object.__setattr__(self, 'hooks', Hooks())
38-
object.__setattr__(self, 'http', HttpConfig())
36+
object.__setattr__(self, "global_config", global_config)
37+
object.__setattr__(self, "integration_name", name)
38+
object.__setattr__(self, "hooks", Hooks())
39+
object.__setattr__(self, "http", HttpConfig())
3940

4041
# Set default analytics configuration, default is disabled
4142
# DEV: Default to `None` which means do not set this key
4243
# Inject environment variables for integration
43-
analytics_enabled_env = get_env(name, 'analytics_enabled')
44+
analytics_enabled_env = get_env(name, "analytics_enabled")
4445
if analytics_enabled_env is not None:
4546
analytics_enabled_env = asbool(analytics_enabled_env)
46-
self.setdefault('analytics_enabled', analytics_enabled_env)
47-
self.setdefault('analytics_sample_rate', float(get_env(name, 'analytics_sample_rate', default=1.0)))
47+
self.setdefault("analytics_enabled", analytics_enabled_env)
48+
self.setdefault("analytics_sample_rate", float(get_env(name, "analytics_sample_rate", default=1.0)))
4849

4950
def __deepcopy__(self, memodict=None):
5051
new = IntegrationConfig(self.global_config, self.integration_name, deepcopy(dict(self), memodict))
@@ -92,7 +93,7 @@ def get_analytics_sample_rate(self, use_global_config=False):
9293
configuration
9394
"""
9495
if self._is_analytics_enabled(use_global_config):
95-
analytics_sample_rate = getattr(self, 'analytics_sample_rate', None)
96+
analytics_sample_rate = getattr(self, "analytics_sample_rate", None)
9697
# return True if attribute is None or attribute not found
9798
if analytics_sample_rate is None:
9899
return True
@@ -105,5 +106,5 @@ def get_analytics_sample_rate(self, use_global_config=False):
105106

106107
def __repr__(self):
107108
cls = self.__class__
108-
keys = ', '.join(self.keys())
109-
return '{}.{}({})'.format(cls.__module__, cls.__name__, keys)
109+
keys = ", ".join(self.keys())
110+
return "{}.{}({})".format(cls.__module__, cls.__name__, keys)

ddtrace/tracer.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from .utils.deprecation import deprecated, RemovedInDDTrace10Warning
2020
from .vendor.dogstatsd import DogStatsd
2121
from . import compat
22+
from . import _hooks
2223

2324

2425
log = get_logger(__name__)
@@ -141,6 +142,30 @@ def __init__(self, url=None, dogstatsd_url=DEFAULT_DOGSTATSD_URL):
141142
writer=writer,
142143
)
143144

145+
self._hooks = _hooks.Hooks()
146+
147+
def on_start_span(self, func):
148+
"""Register a function to execute when a span start.
149+
150+
Can be used as a decorator.
151+
152+
:param func: The function to call when starting a span.
153+
The started span will be passed as argument.
154+
"""
155+
self._hooks.register(self.__class__.start_span, func)
156+
return func
157+
158+
def deregister_on_start_span(self, func):
159+
"""Unregister a function registered to execute when a span starts.
160+
161+
Can be used as a decorator.
162+
163+
:param func: The function to stop calling when starting a span.
164+
"""
165+
166+
self._hooks.deregister(self.__class__.start_span, func)
167+
return func
168+
144169
@property
145170
def debug_logging(self):
146171
return self.log.isEnabledFor(logging.DEBUG)
@@ -456,6 +481,8 @@ def start_span(self, name, child_of=None, service=None, resource=None, span_type
456481
# service(s) that may have been added.
457482
self._update_dogstatsd_constant_tags()
458483

484+
self._hooks.emit(self.__class__.start_span, span)
485+
459486
return span
460487

461488
def _update_dogstatsd_constant_tags(self):

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ exclude = '''
8686
| opentracer/
8787
| profiling/exporter/pprof_pb2.py$
8888
| propagation/
89-
| settings/
9089
| vendor/
9190
)
9291
| tests/

0 commit comments

Comments
 (0)